Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -11,6 +11,8 @@ import sqlite3 
     | 
|
| 11 | 
         
             
            import math
         
     | 
| 12 | 
         
             
            import time
         
     | 
| 13 | 
         
             
            from huggingface_hub import hf_hub_download
         
     | 
| 
         | 
|
| 
         | 
|
| 14 | 
         | 
| 15 | 
         
             
            # 翻译表
         
     | 
| 16 | 
         
             
            SUBJECT_TRANS = {
         
     | 
| 
         @@ -30,8 +32,8 @@ MODEL_TRANS = { 
     | 
|
| 30 | 
         
             
                "still-3-1.5b-preview": "STILL-3-1.5B-Preview",
         
     | 
| 31 | 
         
             
                "deepseek-r1-distill-qwen-32b": "DeepSeek-R1-Distill-Qwen-32B",
         
     | 
| 32 | 
         
             
                "light-r1-7b-ds": "Light-R1-7B-DS",
         
     | 
| 33 | 
         
            -
                 
     | 
| 34 | 
         
            -
                 
     | 
| 35 | 
         
             
                "skywork-or1-32b-preview": "Skywork-OR1-32B-Preview",
         
     | 
| 36 | 
         
             
                "deepscaler-1.5b-preview": "DeepScaler-1.5B-Preview",
         
     | 
| 37 | 
         
             
                "deepseek-r1-distill-qwen-7b": "DeepSeek-R1-Distill-Qwen-7B",
         
     | 
| 
         @@ -44,6 +46,11 @@ MODEL_TRANS = { 
     | 
|
| 44 | 
         
             
                "skywork-or1-math-7b": "Skywork-OR1-Math-7B",
         
     | 
| 45 | 
         
             
                "skywork-or1-7b-preview": "Skywork-OR1-7B-Preview",
         
     | 
| 46 | 
         
             
                "qwen3-30b-a3b": "Qwen3-30B-A3B",
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 47 | 
         
             
                # 添加更多模型映射
         
     | 
| 48 | 
         
             
            }
         
     | 
| 49 | 
         | 
| 
         @@ -72,7 +79,6 @@ class ModelDatabase: 
     | 
|
| 72 | 
         
             
                    self.conn.execute("PRAGMA temp_store = MEMORY")  # 临时表存储在内存中
         
     | 
| 73 | 
         
             
                    self.conn.execute("PRAGMA mmap_size = 8589934592")  # 尝试使用8GB内存映射
         
     | 
| 74 | 
         
             
                    self.conn.row_factory = sqlite3.Row
         
     | 
| 75 | 
         
            -
                    print("Database connection established with optimized parameters")
         
     | 
| 76 | 
         | 
| 77 | 
         
             
                    # 创建索引以加速查询
         
     | 
| 78 | 
         
             
                    self._ensure_indices()
         
     | 
| 
         @@ -96,7 +102,7 @@ class ModelDatabase: 
     | 
|
| 96 | 
         
             
                        cursor.execute("CREATE INDEX IF NOT EXISTS idx_problems_unique_id ON problems(unique_id)")
         
     | 
| 97 | 
         
             
                        cursor.execute("ANALYZE")  # 分析表以优化查询计划
         
     | 
| 98 | 
         
             
                    except Exception as e:
         
     | 
| 99 | 
         
            -
                         
     | 
| 100 | 
         | 
| 101 | 
         
             
                def get_available_models(self):
         
     | 
| 102 | 
         
             
                    """Get list of all available models"""
         
     | 
| 
         @@ -110,8 +116,7 @@ class ModelDatabase: 
     | 
|
| 110 | 
         
             
                        models = [row['model_name'] for row in cursor.fetchall()]
         
     | 
| 111 | 
         
             
                        self._models_cache = models  # 存储到实例缓存
         
     | 
| 112 | 
         
             
                        return models
         
     | 
| 113 | 
         
            -
                    except sqlite3.OperationalError 
     | 
| 114 | 
         
            -
                        print(f"Error in get_available_models: {e}")
         
     | 
| 115 | 
         
             
                        return []
         
     | 
| 116 | 
         | 
| 117 | 
         
             
                def get_available_datasets(self):
         
     | 
| 
         @@ -126,8 +131,7 @@ class ModelDatabase: 
     | 
|
| 126 | 
         
             
                        datasets = [row['dataset'].upper() for row in cursor.fetchall()]
         
     | 
| 127 | 
         
             
                        self._datasets_cache = datasets  # 存储到实例缓存
         
     | 
| 128 | 
         
             
                        return datasets
         
     | 
| 129 | 
         
            -
                    except sqlite3.OperationalError 
     | 
| 130 | 
         
            -
                        print(f"Error in get_available_datasets: {e}")
         
     | 
| 131 | 
         
             
                        return DATASETS
         
     | 
| 132 | 
         | 
| 133 | 
         
             
                def get_model_statistics(self, model_name, dataset):
         
     | 
| 
         @@ -173,8 +177,7 @@ class ModelDatabase: 
     | 
|
| 173 | 
         | 
| 174 | 
         
             
                        self._cache[cache_key] = stats_data
         
     | 
| 175 | 
         
             
                        return stats_data
         
     | 
| 176 | 
         
            -
                    except sqlite3.OperationalError 
     | 
| 177 | 
         
            -
                        print(f"Database error in get_model_statistics: {e}")
         
     | 
| 178 | 
         
             
                        return [["Database Error", "No data available"]]
         
     | 
| 179 | 
         | 
| 180 | 
         
             
                def get_all_model_accuracies(self, dataset):
         
     | 
| 
         @@ -194,8 +197,7 @@ class ModelDatabase: 
     | 
|
| 194 | 
         
             
                        results = [(row['model_name'], row['accuracy']) for row in cursor.fetchall()]
         
     | 
| 195 | 
         
             
                        self._cache[cache_key] = results
         
     | 
| 196 | 
         
             
                        return results
         
     | 
| 197 | 
         
            -
                    except sqlite3.OperationalError 
     | 
| 198 | 
         
            -
                        print(f"Error in get_all_model_accuracies: {e}")
         
     | 
| 199 | 
         
             
                        return []
         
     | 
| 200 | 
         | 
| 201 | 
         
             
                def get_problems_by_model_dataset(self, model_name, dataset):
         
     | 
| 
         @@ -222,8 +224,7 @@ class ModelDatabase: 
     | 
|
| 222 | 
         
             
                        sorted_results = sorted(results, key=lambda x: int(re.search(r'\d+', x[0]).group(0)) if re.search(r'\d+', x[0]) else 0)
         
     | 
| 223 | 
         
             
                        self._cache[cache_key] = sorted_results
         
     | 
| 224 | 
         
             
                        return sorted_results
         
     | 
| 225 | 
         
            -
                    except sqlite3.OperationalError 
     | 
| 226 | 
         
            -
                        print(f"Database error in get_problems_by_model_dataset: {e}")
         
     | 
| 227 | 
         
             
                        return []
         
     | 
| 228 | 
         | 
| 229 | 
         
             
                def get_problem_data(self, model_name, dataset, problem_id):
         
     | 
| 
         @@ -248,8 +249,7 @@ class ModelDatabase: 
     | 
|
| 248 | 
         
             
                                # 转为字典存储,避免SQLite连接依赖
         
     | 
| 249 | 
         
             
                                self._problem_cache[problem_cache_key] = dict(problem)
         
     | 
| 250 | 
         
             
                                problem = self._problem_cache[problem_cache_key]
         
     | 
| 251 | 
         
            -
                        except Exception 
     | 
| 252 | 
         
            -
                            print(f"Error fetching problem data: {e}")
         
     | 
| 253 | 
         
             
                            return None, None
         
     | 
| 254 | 
         | 
| 255 | 
         
             
                    if not problem:
         
     | 
| 
         @@ -279,8 +279,7 @@ class ModelDatabase: 
     | 
|
| 279 | 
         
             
                                responses = [dict(r) for r in responses]
         
     | 
| 280 | 
         
             
                                self._response_cache[resp_cache_key] = responses
         
     | 
| 281 | 
         
             
                            return problem, responses
         
     | 
| 282 | 
         
            -
                        except Exception 
     | 
| 283 | 
         
            -
                            print(f"Error fetching responses: {e}")
         
     | 
| 284 | 
         
             
                            return problem, None
         
     | 
| 285 | 
         
             
                    else:
         
     | 
| 286 | 
         
             
                        # 获��所有模型对此问题的响应
         
     | 
| 
         @@ -305,8 +304,7 @@ class ModelDatabase: 
     | 
|
| 305 | 
         
             
                                responses = [dict(r) for r in responses]
         
     | 
| 306 | 
         
             
                                self._response_cache[resp_cache_key] = responses
         
     | 
| 307 | 
         
             
                            return problem, responses
         
     | 
| 308 | 
         
            -
                        except Exception 
     | 
| 309 | 
         
            -
                            print(f"Error fetching all responses: {e}")
         
     | 
| 310 | 
         
             
                            return problem, None
         
     | 
| 311 | 
         | 
| 312 | 
         
             
                def get_model_responses(self, selected_models, dataset, problem_id):
         
     | 
| 
         @@ -341,36 +339,46 @@ class ModelDatabase: 
     | 
|
| 341 | 
         
             
                    """清除指定部分或全部缓存"""
         
     | 
| 342 | 
         
             
                    if section == 'main' or section is None:
         
     | 
| 343 | 
         
             
                        self._cache = {}
         
     | 
| 344 | 
         
            -
                        print("Cleared main cache")
         
     | 
| 345 | 
         
             
                    if section == 'problem' or section is None:
         
     | 
| 346 | 
         
             
                        self._problem_cache = {}
         
     | 
| 347 | 
         
            -
                        print("Cleared problem cache")
         
     | 
| 348 | 
         
             
                    if section == 'response' or section is None:
         
     | 
| 349 | 
         
             
                        self._response_cache = {}
         
     | 
| 350 | 
         
            -
                        print("Cleared response cache")
         
     | 
| 351 | 
         
             
                    if section == 'models' or section is None:
         
     | 
| 352 | 
         
             
                        if hasattr(self, '_models_cache'):
         
     | 
| 353 | 
         
             
                            self._models_cache = None
         
     | 
| 354 | 
         
             
                        if hasattr(self, '_datasets_cache'):
         
     | 
| 355 | 
         
             
                            self._datasets_cache = None
         
     | 
| 356 | 
         
            -
                        print("Cleared metadata cache")
         
     | 
| 357 | 
         | 
| 358 | 
         
             
                def close(self):
         
     | 
| 359 | 
         
             
                    """关闭数据库连接并释放资源"""
         
     | 
| 360 | 
         
             
                    if hasattr(self, 'conn') and self.conn:
         
     | 
| 361 | 
         
             
                        try:
         
     | 
| 362 | 
         
             
                            self.conn.close()
         
     | 
| 363 | 
         
            -
             
     | 
| 364 | 
         
            -
             
     | 
| 365 | 
         
            -
                            print(f"Error closing database: {e}")
         
     | 
| 366 | 
         | 
| 367 | 
         
             
                    # 清理所有缓存
         
     | 
| 368 | 
         
             
                    self.clear_cache()
         
     | 
| 369 | 
         | 
| 370 | 
         
             
            def format_latex(text):
         
     | 
| 371 | 
         
             
                if text is None: return ""
         
     | 
| 372 | 
         
            -
                 
     | 
| 
         | 
|
| 
         | 
|
| 373 | 
         
             
                text = text.replace('\n', '<br>')
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 374 | 
         
             
                return text
         
     | 
| 375 | 
         | 
| 376 | 
         
             
            def get_gradient_color(accuracy, color_map='RdYlGn'):
         
     | 
| 
         @@ -378,7 +386,7 @@ def get_gradient_color(accuracy, color_map='RdYlGn'): 
     | 
|
| 378 | 
         
             
                    return "#505050" # Default for missing or invalid accuracy
         
     | 
| 379 | 
         
             
                try:
         
     | 
| 380 | 
         
             
                    # 使用更深的颜色映射
         
     | 
| 381 | 
         
            -
                    cmap = plt. 
     | 
| 382 | 
         
             
                    rgba = cmap(float(accuracy))
         
     | 
| 383 | 
         | 
| 384 | 
         
             
                    # 确保颜色足够深以与白色文本形成对比
         
     | 
| 
         @@ -391,8 +399,7 @@ def get_gradient_color(accuracy, color_map='RdYlGn'): 
     | 
|
| 391 | 
         
             
                    # 转回十六进制
         
     | 
| 392 | 
         
             
                    hex_color = mpl.colors.rgb2hex((r, g, b, a))
         
     | 
| 393 | 
         
             
                    return hex_color
         
     | 
| 394 | 
         
            -
                except Exception 
     | 
| 395 | 
         
            -
                    print(f"Error getting gradient color: {e}")
         
     | 
| 396 | 
         
             
                    return "#505050"
         
     | 
| 397 | 
         | 
| 398 | 
         
             
            def get_contrasting_text_color(bg_color):
         
     | 
| 
         @@ -425,135 +432,172 @@ def get_contrasting_text_color(bg_color): 
     | 
|
| 425 | 
         
             
                # 其他颜色根据亮度决定
         
     | 
| 426 | 
         
             
                return "#000" if yiq > 160 else "#fff"
         
     | 
| 427 | 
         | 
| 428 | 
         
            -
            def  
     | 
| 
         | 
|
| 429 | 
         
             
                if sample is None: return ""
         
     | 
| 430 | 
         
             
                sample_dict = dict(sample) if hasattr(sample, 'keys') else sample if isinstance(sample, dict) else {}
         
     | 
| 431 | 
         
            -
                if not sample_dict: return " 
     | 
| 432 | 
         | 
| 433 | 
         
            -
                 
     | 
| 434 | 
         
             
                extracted = sample_dict.get('extracted', '')
         
     | 
| 435 | 
         
            -
                formatted_extracted = format_latex(extracted) if extracted else ""
         
     | 
| 436 | 
         
             
                correctness = sample_dict.get('correctness', 0)
         
     | 
| 437 | 
         
             
                correctness_label = "✓ Correct" if correctness else "✗ Incorrect"
         
     | 
| 438 | 
         
             
                correctness_color = "var(--color-green)" if correctness else "var(--color-red)"
         
     | 
| 439 | 
         
            -
                html = f"<div style='font-size: 0.85em; padding: 10px; border-radius: 8px;' class='dark-mode-compatible dark-mode-bg-secondary'>"
         
     | 
| 440 | 
         | 
| 441 | 
         
            -
                #  
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 442 | 
         
             
                if show_correctness:
         
     | 
| 443 | 
         
            -
                    html += f"<div style='display: flex; align-items: center; margin-bottom: 5px;'>"
         
     | 
| 
         | 
|
| 444 | 
         
             
                    html += f"<span style='color: {correctness_color}; font-weight: bold; margin-right: 10px;'>{correctness_label}</span>"
         
     | 
| 445 | 
         
            -
                     
     | 
| 446 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 447 | 
         
             
                    html += f"</div>"
         
     | 
| 448 | 
         | 
| 449 | 
         
            -
                html += f"<div style='white-space: pre-wrap;' class='math-content'>{formatted_response}</div>"
         
     | 
| 450 | 
         
             
                html += "</div>"
         
     | 
| 451 | 
         
             
                return html
         
     | 
| 452 | 
         | 
| 453 | 
         
            -
            def  
     | 
| 454 | 
         
            -
                 
     | 
| 455 | 
         
            -
                if  
     | 
| 456 | 
         
            -
             
     | 
| 457 | 
         
            -
             
     | 
| 458 | 
         
            -
                    if potential_sort_cols:
         
     | 
| 459 | 
         
            -
                        sort_by = potential_sort_cols[0] # Default to first accuracy column
         
     | 
| 460 | 
         
            -
                    else: # Fallback to index if no accuracy column
         
     | 
| 461 | 
         
            -
                        sort_by = df.index.name if df.index.name else 'unique_id'
         
     | 
| 462 | 
         
            -
                        if sort_by not in df.index.name and sort_by not in df.columns : # df.index.name might be None
         
     | 
| 463 | 
         
            -
                             return None, f"Sort column '{sort_by}' not found in DataFrame."
         
     | 
| 464 | 
         | 
| 465 | 
         
            -
                 
     | 
| 466 | 
         
            -
             
     | 
| 467 | 
         
            -
                      df_sorted = df.sort_values(by=sort_by, ascending=ascending)
         
     | 
| 468 | 
         
            -
                  elif sort_by == df.index.name:
         
     | 
| 469 | 
         
            -
                      df_sorted = df.sort_index(ascending=ascending) # Sorting by index
         
     | 
| 470 | 
         
            -
                  else:
         
     | 
| 471 | 
         
            -
                      df_sorted = df # No sort if column not found and not index
         
     | 
| 472 | 
         
            -
                except KeyError:
         
     | 
| 473 | 
         
            -
                    return None, f"Sort column '{sort_by}' not found."
         
     | 
| 474 | 
         
            -
             
     | 
| 475 | 
         
            -
                df_sorted['problem_id_display'] = df_sorted.index.to_series().apply(lambda x: re.search(r'\d+', str(x)).group(0) if re.search(r'\d+', str(x)) else str(x))
         
     | 
| 476 | 
         
            -
                accuracy_cols_to_plot = [col for col in df_sorted.columns if col.endswith('_accuracy')]
         
     | 
| 477 | 
         
            -
                if not accuracy_cols_to_plot:
         
     | 
| 478 | 
         
            -
                    return None, "No accuracy columns to plot."
         
     | 
| 479 | 
         
            -
             
     | 
| 480 | 
         
            -
                fig, ax = plt.subplots(figsize=(10, max(6, len(df_sorted) * 0.25)))
         
     | 
| 481 | 
         
            -
                cmap = plt.cm.get_cmap('RdYlGn')
         
     | 
| 482 | 
         
            -
                num_models = len(accuracy_cols_to_plot)
         
     | 
| 483 | 
         
            -
                bar_height = 0.8 / num_models if num_models > 0 else 0.8
         
     | 
| 484 | 
         
            -
                y_pos_base = np.arange(len(df_sorted))
         
     | 
| 485 | 
         
            -
             
     | 
| 486 | 
         
            -
                for i, col_name in enumerate(accuracy_cols_to_plot):
         
     | 
| 487 | 
         
            -
                    model_label = col_name.replace('_accuracy', '')
         
     | 
| 488 | 
         
            -
                    accuracies = df_sorted[col_name].fillna(0) # Fill NaN for plotting
         
     | 
| 489 | 
         
            -
                    # Offset y_pos for grouped bars
         
     | 
| 490 | 
         
            -
                    y_pos = y_pos_base - (bar_height * num_models / 2) + (i * bar_height) + (bar_height / 2)
         
     | 
| 491 | 
         
            -
             
     | 
| 492 | 
         
            -
                    bars = ax.barh(y_pos, accuracies, height=bar_height, label=model_label, alpha=0.8)
         
     | 
| 493 | 
         
            -
                    for bar_idx, bar_val in enumerate(accuracies):
         
     | 
| 494 | 
         
            -
                        bar.set_color(cmap(bar_val)) # Color individual bars
         
     | 
| 495 | 
         
            -
                        if bar_val > 0:
         
     | 
| 496 | 
         
            -
                             ax.text(max(0.01, bar_val + 0.01), y_pos[bar_idx], f'{bar_val:.0%}', va='center', ha='left', fontsize=8)
         
     | 
| 497 | 
         | 
| 498 | 
         
            -
                 
     | 
| 499 | 
         
            -
                 
     | 
| 500 | 
         
            -
                 
     | 
| 501 | 
         
            -
                 
     | 
| 502 | 
         
            -
                 
     | 
| 503 | 
         
            -
                 
     | 
| 504 | 
         
            -
                 
     | 
| 505 | 
         
            -
                 
     | 
| 506 | 
         
            -
                 
     | 
| 507 | 
         
            -
                 
     | 
| 508 | 
         
            -
                 
     | 
| 509 | 
         
            -
                return  
     | 
| 510 | 
         | 
| 511 | 
         
            -
            def  
     | 
| 512 | 
         
            -
                 
     | 
| 513 | 
         
            -
                if  
     | 
| 514 | 
         
            -
                     
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 515 | 
         | 
| 516 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 517 | 
         
             
                try:
         
     | 
| 518 | 
         
            -
                     
     | 
| 519 | 
         
            -
             
     | 
| 520 | 
         
            -
             
     | 
| 521 | 
         
            -
                     
     | 
| 522 | 
         
             
                except Exception as e:
         
     | 
| 523 | 
         
            -
                     
     | 
| 524 | 
         
            -
                    return  
     | 
| 525 | 
         | 
| 526 | 
         
            -
             
     | 
| 527 | 
         
            -
             
     | 
| 528 | 
         
            -
             
     | 
| 529 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 530 | 
         | 
| 531 | 
         
            -
             
     | 
| 532 | 
         
            -
             
     | 
| 533 | 
         
            -
                    #  
     | 
| 534 | 
         
            -
                     
     | 
| 535 | 
         
            -
             
     | 
| 536 | 
         
            -
             
     | 
| 537 | 
         
            -
             
     | 
| 538 | 
         
            -
                         
     | 
| 539 | 
         
            -
                        class=\"problem-btn\" 
         
     | 
| 540 | 
         
            -
                        title=\"ID: {pid} - Acc: {acc_pct}%\" 
         
     | 
| 541 | 
         
            -
                        style='background-color: {bg_color}; color: {text_color} !important;
         
     | 
| 542 | 
         
            -
                               border-radius: 4px; padding: 5px; text-align: center; font-size: 0.7em;
         
     | 
| 543 | 
         
            -
                               min-height: 36px; user-select: none; width: 100%;
         
     | 
| 544 | 
         
            -
                               display: flex; flex-direction: column; justify-content: center;
         
     | 
| 545 | 
         
            -
                               overflow: hidden; text-overflow: ellipsis; white-space: nowrap;'>
         
     | 
| 546 | 
         
            -
                        <div style="font-weight: bold; color: {text_color} !important;">{num_display}</div>
         
     | 
| 547 | 
         
            -
                        <div style="color: {text_color} !important;">{acc_pct}%</div>
         
     | 
| 548 | 
         
            -
                    </div>
         
     | 
| 549 | 
         
            -
                    """
         
     | 
| 550 | 
         | 
| 551 | 
         
            -
                 
     | 
| 552 | 
         
            -
             
     | 
| 553 | 
         
            -
             
     | 
| 554 | 
         
            -
             
     | 
| 555 | 
         
            -
             
     | 
| 556 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 557 | 
         | 
| 558 | 
         
             
            def handle_problem_select(problem_id_from_js, current_model_state, current_dataset_state, mode='default'):
         
     | 
| 559 | 
         
             
                global db
         
     | 
| 
         @@ -562,9 +606,6 @@ def handle_problem_select(problem_id_from_js, current_model_state, current_datas 
     | 
|
| 562 | 
         
             
                dataset_name = current_dataset_state.value if hasattr(current_dataset_state, 'value') else current_dataset_state
         
     | 
| 563 | 
         
             
                problem_id = problem_id_from_js.value if hasattr(problem_id_from_js, 'value') else problem_id_from_js
         
     | 
| 564 | 
         | 
| 565 | 
         
            -
                print(f"**************** PYTHON HANDLER CALLED ***************")
         
     | 
| 566 | 
         
            -
                print(f"[Backend] handle_problem_select: problem_id='{problem_id}', length={len(str(problem_id)) if problem_id else 0}")
         
     | 
| 567 | 
         
            -
                
         
     | 
| 568 | 
         
             
                # 处理纯数字输入,构建完整unique_id
         
     | 
| 569 | 
         
             
                if problem_id and problem_id.isdigit():
         
     | 
| 570 | 
         
             
                    # 构建格式:OlymMATH-HARD-0-EN 或类似格式
         
     | 
| 
         @@ -574,26 +615,9 @@ def handle_problem_select(problem_id_from_js, current_model_state, current_datas 
     | 
|
| 574 | 
         
             
                        language, difficulty = parts
         
     | 
| 575 | 
         
             
                        # 构建完整ID
         
     | 
| 576 | 
         
             
                        problem_id = f"OlymMATH-{difficulty}-{problem_id}-{language}"
         
     | 
| 577 | 
         
            -
                        print(f"[Backend] Constructed full problem_id from number: '{problem_id}'")
         
     | 
| 578 | 
         
            -
                
         
     | 
| 579 | 
         
            -
                print(f"[Backend] Raw problem_id received (exact characters):")
         
     | 
| 580 | 
         
            -
                if problem_id:
         
     | 
| 581 | 
         
            -
                    print(f"'{problem_id}'")
         
     | 
| 582 | 
         
            -
                    # Print character by character for debugging
         
     | 
| 583 | 
         
            -
                    print("Character by character:")
         
     | 
| 584 | 
         
            -
                    for i, c in enumerate(str(problem_id)):
         
     | 
| 585 | 
         
            -
                        print(f"  Position {i}: '{c}' (ord={ord(c)})")
         
     | 
| 586 | 
         
            -
                
         
     | 
| 587 | 
         
            -
                print(f"[Backend] model='{model_name}', dataset='{dataset_name}'")
         
     | 
| 588 | 
         
            -
             
     | 
| 589 | 
         
            -
                # Debugging - dump more information about the objects
         
     | 
| 590 | 
         
            -
                print(f"[Debug] problem_id_from_js type: {type(problem_id_from_js)}")
         
     | 
| 591 | 
         
            -
                print(f"[Debug] current_model_state type: {type(current_model_state)}")
         
     | 
| 592 | 
         
            -
                print(f"[Debug] current_dataset_state type: {type(current_dataset_state)}")
         
     | 
| 593 | 
         | 
| 594 | 
         
             
                if not problem_id or not dataset_name:
         
     | 
| 595 | 
         
             
                    error_message = f"Missing data: problem_id='{problem_id}', dataset='{dataset_name}'"
         
     | 
| 596 | 
         
            -
                    print(f"[Backend] {error_message}")
         
     | 
| 597 | 
         
             
                    return "Please fill in all the fields.", "No answer available.", "", gr.State([])
         
     | 
| 598 | 
         | 
| 599 | 
         
             
                # For comparison mode, we might not have a model selected yet
         
     | 
| 
         @@ -604,24 +628,32 @@ def handle_problem_select(problem_id_from_js, current_model_state, current_datas 
     | 
|
| 604 | 
         | 
| 605 | 
         
             
                        if not problem_data:
         
     | 
| 606 | 
         
             
                            error_message = f"Problem data not found: problem_id='{problem_id}', dataset='{dataset_name}'"
         
     | 
| 607 | 
         
            -
                            print(f"[Backend] {error_message}")
         
     | 
| 608 | 
         
             
                            return f"Problem not found: {problem_id}. Please check the ID and try again.", "No answer available.", "", gr.State([])
         
     | 
| 609 | 
         | 
| 610 | 
         
             
                        problem_dict = dict(problem_data)
         
     | 
| 611 | 
         
            -
                         
     | 
| 612 | 
         
            -
                         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 613 | 
         | 
| 614 | 
         
             
                        # For comparison without model, we don't have samples to display
         
     | 
| 615 | 
         
             
                        return problem_content, answer_content, "", gr.State([])
         
     | 
| 616 | 
         
             
                    except Exception as e:
         
     | 
| 617 | 
         
             
                        error_message = f"Database error: {str(e)}"
         
     | 
| 618 | 
         
            -
                        print(f"[Backend] {error_message}")
         
     | 
| 619 | 
         
             
                        return f"Database error occurred. Please try again.", "No answer available.", "", gr.State([])
         
     | 
| 620 | 
         | 
| 621 | 
         
             
                # The regular flow for model-specific data
         
     | 
| 622 | 
         
             
                if not model_name:
         
     | 
| 623 | 
         
             
                    error_message = f"Missing data: model='{model_name}'"
         
     | 
| 624 | 
         
            -
                    print(f"[Backend] {error_message}")
         
     | 
| 625 | 
         
             
                    return "Please fill in all the fields.", "No answer available.", "", gr.State([])
         
     | 
| 626 | 
         | 
| 627 | 
         
             
                # The problem_id from JS should be the full unique_id. No reconstruction needed normally.
         
     | 
| 
         @@ -630,19 +662,29 @@ def handle_problem_select(problem_id_from_js, current_model_state, current_datas 
     | 
|
| 630 | 
         | 
| 631 | 
         
             
                    if not problem_data:
         
     | 
| 632 | 
         
             
                        error_message = f"Problem data not found: problem_id='{problem_id}', model='{model_name}', dataset='{dataset_name}'"
         
     | 
| 633 | 
         
            -
                        print(f"[Backend] {error_message}")
         
     | 
| 634 | 
         
             
                        return f"Problem not found: {problem_id}. Please check the ID and try again.", "No answer available.", "", gr.State([])
         
     | 
| 635 | 
         
             
                except Exception as e:
         
     | 
| 636 | 
         
             
                    error_message = f"Database error: {str(e)}"
         
     | 
| 637 | 
         
            -
                    print(f"[Backend] {error_message}")
         
     | 
| 638 | 
         
             
                    return f"Database error occurred. Please try again.", "No answer available.", "", gr.State([])
         
     | 
| 639 | 
         | 
| 640 | 
         
             
                problem_dict = dict(problem_data)
         
     | 
| 641 | 
         
             
                problem_display_num = re.search(r'\d+', problem_id).group(0) if re.search(r'\d+', problem_id) else problem_id
         
     | 
| 642 | 
         | 
| 643 | 
         
            -
                 
     | 
| 644 | 
         
            -
                 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 645 | 
         | 
| 
         | 
|
| 646 | 
         
             
                if not responses_data:
         
     | 
| 647 | 
         
             
                    samples_grid_html = "<div>No samples available for this problem.</div>"
         
     | 
| 648 | 
         
             
                    # 返回空的样本数据状态
         
     | 
| 
         @@ -720,8 +762,8 @@ def handle_problem_select(problem_id_from_js, current_model_state, current_datas 
     | 
|
| 720 | 
         | 
| 721 | 
         
             
                        samples_grid_html += '</div>'
         
     | 
| 722 | 
         | 
| 723 | 
         
            -
                    # 第三行和第四行 -  
     | 
| 724 | 
         
            -
                    if  
     | 
| 725 | 
         
             
                        # 第三行
         
     | 
| 726 | 
         
             
                        row_samples = displayed_samples[2*samples_per_row:3*samples_per_row]
         
     | 
| 727 | 
         
             
                        if row_samples:
         
     | 
| 
         @@ -787,106 +829,58 @@ def handle_problem_select(problem_id_from_js, current_model_state, current_datas 
     | 
|
| 787 | 
         
             
                    </div>
         
     | 
| 788 | 
         
             
                    """
         
     | 
| 789 | 
         | 
| 790 | 
         
            -
                    print(f"[Backend] Successfully prepared display for problem {problem_id}")
         
     | 
| 791 | 
         
             
                    # 获取第一个样本作为初始样本
         
     | 
| 792 | 
         
             
                    if samples_data:
         
     | 
| 793 | 
         
             
                        # 这样样本会在选择问题后立即显示
         
     | 
| 794 | 
         
            -
                        first_sample = format_sample(samples_data[0])
         
     | 
| 795 | 
         
             
                        return problem_content, answer_content, final_html, gr.State(samples_data)
         
     | 
| 796 | 
         
             
                    else:
         
     | 
| 797 | 
         
             
                        return problem_content, answer_content, final_html, gr.State([])
         
     | 
| 798 | 
         | 
| 799 | 
         
            -
            def  
     | 
| 800 | 
         
            -
                 
     | 
| 801 | 
         
            -
                
         
     | 
| 802 | 
         
            -
             
     | 
| 803 | 
         
            -
                if hasattr(samples_data, 'value'):
         
     | 
| 804 | 
         
            -
                    samples_list = samples_data.value
         
     | 
| 805 | 
         
            -
                    print(f"[Backend] Extracted samples_data from State: length={len(samples_list) if isinstance(samples_list, list) else 'not_list'}")
         
     | 
| 806 | 
         
            -
                else:
         
     | 
| 807 | 
         
            -
                    samples_list = samples_data
         
     | 
| 808 | 
         
            -
                
         
     | 
| 809 | 
         
            -
                # 确保样本编号是整数
         
     | 
| 810 | 
         
            -
                try:
         
     | 
| 811 | 
         
            -
                    sample_idx = int(sample_number)
         
     | 
| 812 | 
         
            -
                except ValueError:
         
     | 
| 813 | 
         
            -
                    return "<div style='color: red; padding: 10px; border: 1px solid red; border-radius: 5px;'>Error: Sample number must be an integer.</div>"
         
     | 
| 814 | 
         
            -
                
         
     | 
| 815 | 
         
            -
                # 确保样本数据存在且为非空列表
         
     | 
| 816 | 
         
            -
                if not samples_list or not isinstance(samples_list, list) or len(samples_list) == 0:
         
     | 
| 817 | 
         
            -
                    return "<div>No sample data available. Please select a problem first.</div>"
         
     | 
| 818 | 
         
            -
                
         
     | 
| 819 | 
         
            -
                # 检查索引是否在有效范围内,如果不在范围内,显示错误消息
         
     | 
| 820 | 
         
            -
                if sample_idx < 0:
         
     | 
| 821 | 
         
            -
                    return f"<div style='color: red; padding: 10px; border: 1px solid red; border-radius: 5px;'>Error: Sample number {sample_idx} is out of range. Valid range is 0 to {len(samples_list) - 1}.</div>"
         
     | 
| 822 | 
         
            -
                
         
     | 
| 823 | 
         
            -
                if sample_idx >= len(samples_list):
         
     | 
| 824 | 
         
            -
                    return f"<div style='color: red; padding: 10px; border: 1px solid red; border-radius: 5px;'>Error: Sample number {sample_idx} is out of range. Valid range is 0 to {len(samples_list) - 1}.</div>"
         
     | 
| 825 | 
         
            -
                
         
     | 
| 826 | 
         
            -
                # 获取所选样本的数据
         
     | 
| 827 | 
         
            -
                try:
         
     | 
| 828 | 
         
            -
                    sample = samples_list[sample_idx]
         
     | 
| 829 | 
         
            -
                    formatted_sample = format_sample(sample)
         
     | 
| 830 | 
         
            -
                    return formatted_sample
         
     | 
| 831 | 
         
            -
                except Exception as e:
         
     | 
| 832 | 
         
            -
                    print(f"[Backend] Error formatting sample: {e}")
         
     | 
| 833 | 
         
            -
                    return f"<div style='color: red; padding: 10px; border: 1px solid red; border-radius: 5px;'>Error displaying sample {sample_idx}: {str(e)}</div>"
         
     | 
| 834 | 
         | 
| 835 | 
         
            -
             
     | 
| 836 | 
         
            -
                """处理并显示第一个样本(索引0)"""
         
     | 
| 837 | 
         
            -
                # 确保从Gradio State对象中提取实际值
         
     | 
| 838 | 
         
            -
                if hasattr(samples_data, 'value'):
         
     | 
| 839 | 
         
            -
                    samples_list = samples_data.value
         
     | 
| 840 | 
         
            -
                else:
         
     | 
| 841 | 
         
            -
                    samples_list = samples_data
         
     | 
| 842 | 
         
            -
                
         
     | 
| 843 | 
         
            -
                # 检查样本数据是否存在
         
     | 
| 844 | 
         
            -
                if not samples_list or not isinstance(samples_list, list) or len(samples_list) == 0:
         
     | 
| 845 | 
         
            -
                    return "<div>No sample data available. Please select the problem and dataset first.</div>"
         
     | 
| 846 | 
         
            -
                
         
     | 
| 847 | 
         
            -
                # 直接获取第一个样本,避免错误处理逻辑
         
     | 
| 848 | 
         
             
                try:
         
     | 
| 849 | 
         
            -
                     
     | 
| 850 | 
         
            -
             
     | 
| 851 | 
         
            -
             
     | 
| 
         | 
|
| 852 | 
         
             
                except Exception as e:
         
     | 
| 853 | 
         
            -
                     
     | 
| 854 | 
         
            -
                    return f"<div style='color: red; padding: 10px; border: 1px solid red; border-radius: 5px;'>Error displaying first sample: {str(e)}</div>"
         
     | 
| 855 | 
         | 
| 856 | 
         
            -
             
     | 
| 857 | 
         
            -
             
     | 
| 858 | 
         
            -
             
     | 
| 859 | 
         
            -
             
     | 
| 860 | 
         
            -
                dataset_name = dataset_state.value if hasattr(dataset_state, 'value') else dataset_state
         
     | 
| 861 | 
         
            -
                problem_id_value = problem_id.value if hasattr(problem_id, 'value') else problem_id
         
     | 
| 862 | 
         
            -
                
         
     | 
| 863 | 
         
            -
                if not problem_id_value or not dataset_name:
         
     | 
| 864 | 
         
            -
                    return "Please select a dataset and enter a problem ID.", "No answer available."
         
     | 
| 865 | 
         | 
| 866 | 
         
            -
             
     | 
| 867 | 
         
            -
             
     | 
| 868 | 
         
            -
                    #  
     | 
| 869 | 
         
            -
                     
     | 
| 870 | 
         
            -
             
     | 
| 871 | 
         
            -
             
     | 
| 872 | 
         
            -
             
     | 
| 873 | 
         
            -
                         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 874 | 
         | 
| 875 | 
         
            -
                 
     | 
| 876 | 
         
            -
             
     | 
| 877 | 
         
            -
             
     | 
| 878 | 
         
            -
             
     | 
| 879 | 
         
            -
             
     | 
| 880 | 
         
            -
             
     | 
| 881 | 
         
            -
                        
         
     | 
| 882 | 
         
            -
                    problem_dict = dict(problem_data)
         
     | 
| 883 | 
         
            -
                    problem_content = f"{format_latex(problem_dict.get('problem', ''))}"
         
     | 
| 884 | 
         
            -
                    answer_content = f"{format_latex(problem_dict.get('answer', ''))}"
         
     | 
| 885 | 
         
            -
                    
         
     | 
| 886 | 
         
            -
                    return problem_content, answer_content
         
     | 
| 887 | 
         
            -
                except Exception as e:
         
     | 
| 888 | 
         
            -
                    print(f"[Backend] Error in handle_comparison_problem_update: {e}")
         
     | 
| 889 | 
         
            -
                    return f"Error: {str(e)}", "No answer available."
         
     | 
| 890 | 
         | 
| 891 | 
         
             
            def create_ui(db_path):
         
     | 
| 892 | 
         
             
                global db
         
     | 
| 
         @@ -896,7 +890,7 @@ def create_ui(db_path): 
     | 
|
| 896 | 
         
             
                if not AVAILABLE_DATASETS:
         
     | 
| 897 | 
         
             
                    AVAILABLE_DATASETS = ["EN-HARD", "EN-EASY", "ZH-HARD", "ZH-EASY"] # Fallback
         
     | 
| 898 | 
         | 
| 899 | 
         
            -
                #  
     | 
| 900 | 
         
             
                custom_css = """
         
     | 
| 901 | 
         
             
                .padding.svelte-phx28p { padding: unset !important; }
         
     | 
| 902 | 
         
             
                body, .gradio-container { font-family: sans-serif; font-size: 0.95em; line-height: 1.6; }
         
     | 
| 
         @@ -904,6 +898,7 @@ def create_ui(db_path): 
     | 
|
| 904 | 
         
             
                .sample-btn:hover { transform: translateY(-1px); box-shadow: 0 2px 5px rgba(0,0,0,0.1); }
         
     | 
| 905 | 
         
             
                .problem-grid-container { overflow-y: auto; }
         
     | 
| 906 | 
         
             
                .math-content { overflow-x: auto; padding: 5px; }
         
     | 
| 
         | 
|
| 907 | 
         
             
                h1, h2, h3, h4, h5 { margin-top: 0.8em; margin-bottom: 0.4em; color: var(--color-text); }
         
     | 
| 908 | 
         
             
                .gradio-tabs > div[role='tablist'] button { font-size: 0.9em; padding: 8px 12px; }
         
     | 
| 909 | 
         
             
                .gr-dropdown select { font-size: 0.9em; }
         
     | 
| 
         @@ -929,49 +924,50 @@ def create_ui(db_path): 
     | 
|
| 929 | 
         
             
                    //border-radius: 8px;
         
     | 
| 930 | 
         
             
                    //margin-top: 10px;
         
     | 
| 931 | 
         
             
                }
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 932 | 
         
             
                """
         
     | 
| 933 | 
         | 
| 934 | 
         
             
                with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky)) as demo:
         
     | 
| 935 | 
         
            -
                    #  
     | 
| 936 | 
         
            -
                    gr.HTML(""" 
         
     | 
| 937 | 
         
            -
                    <script>
         
     | 
| 938 | 
         
            -
                    // Function to load MathJax
         
     | 
| 939 | 
         
            -
                    function loadMathJax() {
         
     | 
| 940 | 
         
            -
                        if (window.MathJax) return;
         
     | 
| 941 | 
         
            -
                        window.MathJax = {
         
     | 
| 942 | 
         
            -
                            tex: {
         
     | 
| 943 | 
         
            -
                                inlineMath: [['$', '$'], ['\\(', '\\)']],
         
     | 
| 944 | 
         
            -
                                displayMath: [['$$', '$$'], ['\\[', '\\]']],
         
     | 
| 945 | 
         
            -
                                processEscapes: true, tags: 'ams'
         
     | 
| 946 | 
         
            -
                            },
         
     | 
| 947 | 
         
            -
                            svg: { fontCache: 'global' },
         
     | 
| 948 | 
         
            -
                            startup: {
         
     | 
| 949 | 
         
            -
                                ready: () => {
         
     | 
| 950 | 
         
            -
                                    window.MathJax.startup.defaultReady();
         
     | 
| 951 | 
         
            -
                                    window.typesetMath = (elements) => {
         
     | 
| 952 | 
         
            -
                                        if (window.MathJax && window.MathJax.startup && window.MathJax.startup.document && window.MathJax.startup.document.source && window.MathJax.startup.document.source.typeset) {
         
     | 
| 953 | 
         
            -
                                            window.MathJax.startup.document.source.typeset(elements);
         
     | 
| 954 | 
         
            -
                                        }
         
     | 
| 955 | 
         
            -
                                    };
         
     | 
| 956 | 
         
            -
                                }
         
     | 
| 957 | 
         
            -
                            }
         
     | 
| 958 | 
         
            -
                        };
         
     | 
| 959 | 
         
            -
                        const script = document.createElement('script');
         
     | 
| 960 | 
         
            -
                        script.src = 'https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js';
         
     | 
| 961 | 
         
            -
                        script.async = true; script.id = 'MathJax-script';
         
     | 
| 962 | 
         
            -
                        document.head.appendChild(script);
         
     | 
| 963 | 
         
            -
                    }
         
     | 
| 964 | 
         
            -
                    if (document.readyState === 'loading') {
         
     | 
| 965 | 
         
            -
                        document.addEventListener('DOMContentLoaded', loadMathJax);
         
     | 
| 966 | 
         
            -
                    } else {
         
     | 
| 967 | 
         
            -
                        loadMathJax();
         
     | 
| 968 | 
         
            -
                    }
         
     | 
| 969 | 
         
            -
                    </script>
         
     | 
| 970 | 
         
            -
                    """)
         
     | 
| 971 | 
         
            -
                    
         
     | 
| 972 | 
         
            -
                    # Hidden Textbox to communicate Problem ID from JS to Python
         
     | 
| 973 | 
         
            -
                    # IMPORTANT: elem_id must match what JavaScript's document.getElementById uses.
         
     | 
| 974 | 
         
            -
                    
         
     | 
| 975 | 
         | 
| 976 | 
         
             
                    current_dataset_state = gr.State(value=AVAILABLE_DATASETS[0] if AVAILABLE_DATASETS else "")
         
     | 
| 977 | 
         
             
                    current_model_state = gr.State(value=None)
         
     | 
| 
         @@ -1036,9 +1032,25 @@ def create_ui(db_path): 
     | 
|
| 1036 | 
         
             
                                with gr.Column(scale=3, min_width=400):
         
     | 
| 1037 | 
         
             
                                    with gr.Tabs():
         
     | 
| 1038 | 
         
             
                                        with gr.TabItem("Problem Statement"):
         
     | 
| 1039 | 
         
            -
                                            problem_markdown_output = gr.Markdown( 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1040 | 
         
             
                                        with gr.TabItem("Reference Answer"):
         
     | 
| 1041 | 
         
            -
                                            answer_markdown_output = gr.Markdown( 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1042 | 
         | 
| 1043 | 
         
             
                                    # 样本网格
         
     | 
| 1044 | 
         
             
                                    samples_grid_output = gr.HTML("")
         
     | 
| 
         @@ -1056,11 +1068,23 @@ def create_ui(db_path): 
     | 
|
| 1056 | 
         
             
                                            every=0.5
         
     | 
| 1057 | 
         
             
                                        )
         
     | 
| 1058 | 
         | 
| 1059 | 
         
            -
                                    # 样本内容显示区域
         
     | 
| 1060 | 
         
            -
                                     
     | 
| 1061 | 
         
             
                                        value="<div>Select a problem first to view samples.</div>",
         
     | 
| 1062 | 
         
            -
                                        elem_classes=" 
     | 
| 1063 | 
         
            -
                                        elem_id="sample- 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1064 | 
         
             
                                    )
         
     | 
| 1065 | 
         | 
| 1066 | 
         
             
                        with gr.TabItem("Model Comparison"):
         
     | 
| 
         @@ -1088,9 +1112,25 @@ def create_ui(db_path): 
     | 
|
| 1088 | 
         
             
                                with gr.Column(scale=1):
         
     | 
| 1089 | 
         
             
                                    with gr.Tabs():
         
     | 
| 1090 | 
         
             
                                        with gr.TabItem("Problem Statement"):
         
     | 
| 1091 | 
         
            -
                                            comp_problem_markdown_output = gr.Markdown( 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1092 | 
         
             
                                        with gr.TabItem("Reference Answer"):
         
     | 
| 1093 | 
         
            -
                                            comp_answer_markdown_output = gr.Markdown( 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1094 | 
         | 
| 1095 | 
         
             
                            # 左右两部分模型比较
         
     | 
| 1096 | 
         
             
                            with gr.Row(variant='compact'):
         
     | 
| 
         @@ -1122,11 +1162,23 @@ def create_ui(db_path): 
     | 
|
| 1122 | 
         
             
                                            every=0.5
         
     | 
| 1123 | 
         
             
                                        )
         
     | 
| 1124 | 
         | 
| 1125 | 
         
            -
                                    # 样本内容显示区域
         
     | 
| 1126 | 
         
            -
                                     
     | 
| 1127 | 
         
             
                                        value="<div>Select a problem first to view samples.</div>",
         
     | 
| 1128 | 
         
            -
                                        elem_classes=" 
     | 
| 1129 | 
         
            -
                                        elem_id="comp-sample- 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1130 | 
         
             
                                    )
         
     | 
| 1131 | 
         | 
| 1132 | 
         
             
                                # 右侧模型
         
     | 
| 
         @@ -1157,11 +1209,23 @@ def create_ui(db_path): 
     | 
|
| 1157 | 
         
             
                                            every=0.5
         
     | 
| 1158 | 
         
             
                                        )
         
     | 
| 1159 | 
         | 
| 1160 | 
         
            -
                                    # 样本内容显示区域
         
     | 
| 1161 | 
         
            -
                                     
     | 
| 1162 | 
         
             
                                        value="<div>Select a problem first to view samples.</div>",
         
     | 
| 1163 | 
         
            -
                                        elem_classes=" 
     | 
| 1164 | 
         
            -
                                        elem_id="comp-sample- 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1165 | 
         
             
                                    )
         
     | 
| 1166 | 
         | 
| 1167 | 
         
             
                    # --- Event Handlers --- 
         
     | 
| 
         @@ -1201,7 +1265,6 @@ def create_ui(db_path): 
     | 
|
| 1201 | 
         
             
                               gr.Dropdown(choices=comp_model_choices if comp_model_choices else [], value=None)
         
     | 
| 1202 | 
         | 
| 1203 | 
         
             
                    def update_problem_grid_and_stats(selected_model_formatted, selected_dataset, mode='default'):
         
     | 
| 1204 | 
         
            -
                        print(f"[Backend] update_problem_grid_and_stats: model_formatted='{selected_model_formatted}', dataset='{selected_dataset}'")
         
     | 
| 1205 | 
         
             
                        if not selected_model_formatted or not selected_dataset:
         
     | 
| 1206 | 
         
             
                            # Return empty/default values for all outputs, including the state
         
     | 
| 1207 | 
         
             
                            return gr.DataFrame(value=[]), gr.HTML("<div>Please select a model and dataset first.</div>"), None
         
     | 
| 
         @@ -1219,7 +1282,6 @@ def create_ui(db_path): 
     | 
|
| 1219 | 
         
             
                        problem_list = db.get_problems_by_model_dataset(model_name, selected_dataset)
         
     | 
| 1220 | 
         
             
                        grid_html = create_problem_grid_html(problem_list, mode=mode)
         
     | 
| 1221 | 
         | 
| 1222 | 
         
            -
                        print(f"[Backend] update_problem_grid_and_stats: New model_name for state: {model_name}")
         
     | 
| 1223 | 
         
             
                        # Correctly return the actual value for the current_model_state output
         
     | 
| 1224 | 
         
             
                        return gr.DataFrame(value=stats_data), gr.HTML(value=grid_html), model_name
         
     | 
| 1225 | 
         | 
| 
         @@ -1238,9 +1300,9 @@ def create_ui(db_path): 
     | 
|
| 1238 | 
         
             
                        inputs=[],
         
     | 
| 1239 | 
         
             
                        outputs=[sample_number_input]
         
     | 
| 1240 | 
         
             
                    ).then(
         
     | 
| 1241 | 
         
            -
                        lambda: ("Please fill in all the fields.", "No answer available.", "", gr.State([]), "<div>Select a problem first to view samples.</div>"),
         
     | 
| 1242 | 
         
             
                        inputs=[],
         
     | 
| 1243 | 
         
            -
                        outputs=[problem_markdown_output, answer_markdown_output, samples_grid_output, current_samples_data_state,  
     | 
| 1244 | 
         
             
                    )
         
     | 
| 1245 | 
         | 
| 1246 | 
         
             
                    # Initial population of model dropdowns based on default dataset
         
     | 
| 
         @@ -1253,9 +1315,9 @@ def create_ui(db_path): 
     | 
|
| 1253 | 
         
             
                        inputs=[current_dataset_state],
         
     | 
| 1254 | 
         
             
                        outputs=[model_stats_df, problem_grid_html_output, current_dataset_state]
         
     | 
| 1255 | 
         
             
                    ).then(
         
     | 
| 1256 | 
         
            -
                        lambda: ("Please fill in all the fields.", "No answer available.", "", gr.State([]), "<div>Select a problem first to view samples.</div>"),
         
     | 
| 1257 | 
         
             
                        inputs=[],
         
     | 
| 1258 | 
         
            -
                        outputs=[problem_markdown_output, answer_markdown_output, samples_grid_output, current_samples_data_state,  
     | 
| 1259 | 
         
             
                    ).then(
         
     | 
| 1260 | 
         
             
                        # 重置Sample Number为0
         
     | 
| 1261 | 
         
             
                        fn=lambda: "0",
         
     | 
| 
         @@ -1319,12 +1381,12 @@ def create_ui(db_path): 
     | 
|
| 1319 | 
         
             
                            problem_content, answer_content, samples_grid_html, new_samples_data = handle_problem_select_comparison(current_problem_id, new_model_state, current_dataset)
         
     | 
| 1320 | 
         | 
| 1321 | 
         
             
                            # 获取第一个样本的内容
         
     | 
| 1322 | 
         
            -
                             
     | 
| 1323 | 
         | 
| 1324 | 
         
            -
                            return grid_html, new_model_state, problem_content, answer_content, samples_grid_html, new_samples_data,  
     | 
| 1325 | 
         
             
                        else:
         
     | 
| 1326 | 
         
             
                            # 没有问题ID,只返回更新的模型状态
         
     | 
| 1327 | 
         
            -
                            return grid_html, new_model_state, "Please enter a problem ID.", "No answer available.", "", gr.State([]), "<div>Select a problem first to view samples.</div>"
         
     | 
| 1328 | 
         | 
| 1329 | 
         
             
                    # 修改model_dropdown的处理函数,以重新查询当前问题响应 - 比较页面右侧
         
     | 
| 1330 | 
         
             
                    def update_model_and_requery_problem_right(model_dropdown_value, current_dataset, current_problem_id):
         
     | 
| 
         @@ -1337,18 +1399,18 @@ def create_ui(db_path): 
     | 
|
| 1337 | 
         
             
                            _, _, samples_grid_html, new_samples_data = handle_problem_select_comparison(current_problem_id, new_model_state, current_dataset)
         
     | 
| 1338 | 
         | 
| 1339 | 
         
             
                            # 获取第一个样本的内容
         
     | 
| 1340 | 
         
            -
                             
     | 
| 1341 | 
         | 
| 1342 | 
         
            -
                            return grid_html, new_model_state, samples_grid_html, new_samples_data,  
     | 
| 1343 | 
         
             
                        else:
         
     | 
| 1344 | 
         
             
                            # 没有问题ID,只返回更新的模型状态
         
     | 
| 1345 | 
         
            -
                            return grid_html, new_model_state, "", gr.State([]), "<div>Select a problem first to view samples.</div>"
         
     | 
| 1346 | 
         | 
| 1347 | 
         
             
                    # 左侧模型选择事件
         
     | 
| 1348 | 
         
             
                    comp_model_dropdown_left.change(
         
     | 
| 1349 | 
         
             
                        fn=update_model_and_requery_problem_left,
         
     | 
| 1350 | 
         
             
                        inputs=[comp_model_dropdown_left, comp_dataset_state, comp_problem_state_input],
         
     | 
| 1351 | 
         
            -
                        outputs=[comp_problem_grid_html_output_left, comp_model_state_left, comp_problem_markdown_output, comp_answer_markdown_output, comp_samples_grid_output_left, comp_samples_data_state_left,  
     | 
| 1352 | 
         
             
                    ).then(
         
     | 
| 1353 | 
         
             
                        # 重置Sample Number为0
         
     | 
| 1354 | 
         
             
                        fn=lambda: "0",
         
     | 
| 
         @@ -1360,7 +1422,7 @@ def create_ui(db_path): 
     | 
|
| 1360 | 
         
             
                    comp_model_dropdown_right.change(
         
     | 
| 1361 | 
         
             
                        fn=update_model_and_requery_problem_right,
         
     | 
| 1362 | 
         
             
                        inputs=[comp_model_dropdown_right, comp_dataset_state, comp_problem_state_input],
         
     | 
| 1363 | 
         
            -
                        outputs=[comp_problem_grid_html_output_right, comp_model_state_right, comp_samples_grid_output_right, comp_samples_data_state_right,  
     | 
| 1364 | 
         
             
                    ).then(
         
     | 
| 1365 | 
         
             
                        # 重置Sample Number为0
         
     | 
| 1366 | 
         
             
                        fn=lambda: "0",
         
     | 
| 
         @@ -1368,7 +1430,21 @@ def create_ui(db_path): 
     | 
|
| 1368 | 
         
             
                        outputs=[comp_sample_number_input_right]
         
     | 
| 1369 | 
         
             
                    )
         
     | 
| 1370 | 
         | 
| 1371 | 
         
            -
                    #  
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1372 | 
         
             
                    comp_problem_state_input.change(
         
     | 
| 1373 | 
         
             
                        fn=handle_problem_select_comparison,
         
     | 
| 1374 | 
         
             
                        inputs=[comp_problem_state_input, comp_model_state_left, comp_dataset_state],
         
     | 
| 
         @@ -1381,7 +1457,7 @@ def create_ui(db_path): 
     | 
|
| 1381 | 
         
             
                    ).then(
         
     | 
| 1382 | 
         
             
                        fn=handle_first_sample,
         
     | 
| 1383 | 
         
             
                        inputs=[comp_samples_data_state_left],
         
     | 
| 1384 | 
         
            -
                        outputs=[ 
     | 
| 1385 | 
         
             
                    )
         
     | 
| 1386 | 
         | 
| 1387 | 
         
             
                    # 问题选择事件 - 右侧模型
         
     | 
| 
         @@ -1397,25 +1473,10 @@ def create_ui(db_path): 
     | 
|
| 1397 | 
         
             
                    ).then(
         
     | 
| 1398 | 
         
             
                        fn=handle_first_sample,
         
     | 
| 1399 | 
         
             
                        inputs=[comp_samples_data_state_right],
         
     | 
| 1400 | 
         
            -
                        outputs=[ 
     | 
| 1401 | 
         
            -
                    )
         
     | 
| 1402 | 
         
            -
                    
         
     | 
| 1403 | 
         
            -
                    # 左侧样本选择
         
     | 
| 1404 | 
         
            -
                    comp_sample_number_input_left.change(
         
     | 
| 1405 | 
         
            -
                        fn=handle_sample_select,
         
     | 
| 1406 | 
         
            -
                        inputs=[comp_sample_number_input_left, comp_samples_data_state_left],
         
     | 
| 1407 | 
         
            -
                        outputs=[comp_sample_content_output_left]
         
     | 
| 1408 | 
         
            -
                    )
         
     | 
| 1409 | 
         
            -
                    
         
     | 
| 1410 | 
         
            -
                    # 右侧样本选择
         
     | 
| 1411 | 
         
            -
                    comp_sample_number_input_right.change(
         
     | 
| 1412 | 
         
            -
                        fn=handle_sample_select,
         
     | 
| 1413 | 
         
            -
                        inputs=[comp_sample_number_input_right, comp_samples_data_state_right],
         
     | 
| 1414 | 
         
            -
                        outputs=[comp_sample_content_output_right]
         
     | 
| 1415 | 
         
             
                    )
         
     | 
| 1416 | 
         | 
| 1417 | 
         
             
                    # This is the crucial link: problem_state_input is changed by user, triggers this Python callback.
         
     | 
| 1418 | 
         
            -
                    print("Setting up problem_state_input change handler...")
         
     | 
| 1419 | 
         
             
                    problem_state_input.change(
         
     | 
| 1420 | 
         
             
                        fn=handle_problem_select,
         
     | 
| 1421 | 
         
             
                        inputs=[problem_state_input, current_model_state, current_dataset_state],
         
     | 
| 
         @@ -1428,7 +1489,7 @@ def create_ui(db_path): 
     | 
|
| 1428 | 
         
             
                    ).then(
         
     | 
| 1429 | 
         
             
                        fn=handle_first_sample,
         
     | 
| 1430 | 
         
             
                        inputs=[current_samples_data_state],
         
     | 
| 1431 | 
         
            -
                        outputs=[ 
     | 
| 1432 | 
         
             
                    )
         
     | 
| 1433 | 
         | 
| 1434 | 
         
             
                    # Also listen for direct input event which may be more reliable than change
         
     | 
| 
         @@ -1444,20 +1505,20 @@ def create_ui(db_path): 
     | 
|
| 1444 | 
         
             
                    ).then(
         
     | 
| 1445 | 
         
             
                        fn=handle_first_sample,
         
     | 
| 1446 | 
         
             
                        inputs=[current_samples_data_state],
         
     | 
| 1447 | 
         
            -
                        outputs=[ 
     | 
| 1448 | 
         
             
                    )
         
     | 
| 1449 | 
         | 
| 1450 | 
         
             
                    # 添加样本编号的事件处理
         
     | 
| 1451 | 
         
             
                    sample_number_input.change(
         
     | 
| 1452 | 
         
             
                        fn=handle_sample_select,
         
     | 
| 1453 | 
         
             
                        inputs=[sample_number_input, current_samples_data_state],
         
     | 
| 1454 | 
         
            -
                        outputs=[ 
     | 
| 1455 | 
         
             
                    )
         
     | 
| 1456 | 
         | 
| 1457 | 
         
             
                    sample_number_input.input(
         
     | 
| 1458 | 
         
             
                        fn=handle_sample_select,
         
     | 
| 1459 | 
         
             
                        inputs=[sample_number_input, current_samples_data_state],
         
     | 
| 1460 | 
         
            -
                        outputs=[ 
     | 
| 1461 | 
         
             
                    )
         
     | 
| 1462 | 
         | 
| 1463 | 
         
             
                    # 修改model_dropdown.change处理函数,以重新查询当前问题响应
         
     | 
| 
         @@ -1470,17 +1531,17 @@ def create_ui(db_path): 
     | 
|
| 1470 | 
         
             
                            problem_content, answer_content, samples_grid_html, new_samples_data = handle_problem_select(current_problem_id, new_model_state, current_dataset)
         
     | 
| 1471 | 
         | 
| 1472 | 
         
             
                            # 获取第一个样本的内容
         
     | 
| 1473 | 
         
            -
                             
     | 
| 1474 | 
         | 
| 1475 | 
         
            -
                            return stats_df, grid_html, new_model_state, problem_content, answer_content, samples_grid_html, new_samples_data,  
     | 
| 1476 | 
         
             
                        else:
         
     | 
| 1477 | 
         
             
                            # 没有问题ID,只返回更新的模型状态
         
     | 
| 1478 | 
         
            -
                            return stats_df, grid_html, new_model_state, "Please fill in all the fields.", "No answer available.", "", gr.State([]), "<div>Select a problem first to view samples.</div>"
         
     | 
| 1479 | 
         | 
| 1480 | 
         
             
                    model_dropdown.change(
         
     | 
| 1481 | 
         
             
                        fn=update_model_and_requery_problem,
         
     | 
| 1482 | 
         
             
                        inputs=[model_dropdown, current_dataset_state, problem_state_input], 
         
     | 
| 1483 | 
         
            -
                        outputs=[model_stats_df, problem_grid_html_output, current_model_state, problem_markdown_output, answer_markdown_output, samples_grid_output, current_samples_data_state,  
     | 
| 1484 | 
         
             
                    ).then(
         
     | 
| 1485 | 
         
             
                        # 重置Sample Number为0
         
     | 
| 1486 | 
         
             
                        fn=lambda: "0",
         
     | 
| 
         @@ -1501,20 +1562,17 @@ def monitor_memory_usage(): 
     | 
|
| 1501 | 
         | 
| 1502 | 
         
             
                    # 如果内存使用超过12GB (激进设置),清理缓存
         
     | 
| 1503 | 
         
             
                    if memory_usage_mb > 12000:  # 12GB
         
     | 
| 1504 | 
         
            -
                        print(f"Memory usage high ({memory_usage_mb:.1f} MB), clearing caches...")
         
     | 
| 1505 | 
         
             
                        if db:
         
     | 
| 1506 | 
         
             
                            db.clear_cache('response')  # 优先清理响应缓存
         
     | 
| 1507 | 
         
             
                            gc.collect()
         
     | 
| 1508 | 
         
             
                    # 如果内存使用超过14GB,更激进地清理
         
     | 
| 1509 | 
         
             
                    if memory_usage_mb > 14000:  # 14GB
         
     | 
| 1510 | 
         
            -
                        print(f"Memory usage critical ({memory_usage_mb:.1f} MB), clearing all caches...")
         
     | 
| 1511 | 
         
             
                        if db:
         
     | 
| 1512 | 
         
             
                            db.clear_cache()  # 清理所有缓存
         
     | 
| 1513 | 
         
             
                            gc.collect()
         
     | 
| 1514 | 
         | 
| 1515 | 
         
             
                    return f"Memory: {memory_usage_mb:.1f} MB"
         
     | 
| 1516 | 
         
             
                except Exception as e:
         
     | 
| 1517 | 
         
            -
                    print(f"Error monitoring memory: {e}")
         
     | 
| 1518 | 
         
             
                    return "Memory monitor error"
         
     | 
| 1519 | 
         | 
| 1520 | 
         
             
            # 修改主函数以使用优化策略
         
     | 
| 
         @@ -1523,7 +1581,6 @@ if __name__ == "__main__": 
     | 
|
| 1523 | 
         | 
| 1524 | 
         
             
                # 检查数据库文件是否存在,如果不存在则从 Hugging Face 下载
         
     | 
| 1525 | 
         
             
                if not os.path.exists(DB_PATH):
         
     | 
| 1526 | 
         
            -
                    print(f"Database file not found at {DB_PATH}. Attempting to download from Hugging Face...")
         
     | 
| 1527 | 
         
             
                    try:
         
     | 
| 1528 | 
         
             
                        # 从环境变量获取 HF_TOKEN
         
     | 
| 1529 | 
         
             
                        hf_token = os.environ.get("HF_TOKEN")
         
     | 
| 
         @@ -1537,9 +1594,7 @@ if __name__ == "__main__": 
     | 
|
| 1537 | 
         
             
                            repo_type="dataset",
         
     | 
| 1538 | 
         
             
                            token=hf_token
         
     | 
| 1539 | 
         
             
                        )
         
     | 
| 1540 | 
         
            -
                        print(f"Successfully downloaded database file to {DB_PATH}")
         
     | 
| 1541 | 
         
             
                    except Exception as e:
         
     | 
| 1542 | 
         
            -
                        print(f"Error downloading database: {str(e)}")
         
     | 
| 1543 | 
         
             
                        # 创建一个显示错误信息的简单 Gradio 应用
         
     | 
| 1544 | 
         
             
                        with gr.Blocks() as error_demo:
         
     | 
| 1545 | 
         
             
                            gr.Markdown(f"# Error: Database Download Failed\n{str(e)}\nPlease ensure HF_TOKEN is set correctly and try again.")
         
     | 
| 
         @@ -1547,19 +1602,13 @@ if __name__ == "__main__": 
     | 
|
| 1547 | 
         
             
                        exit(1)
         
     | 
| 1548 | 
         | 
| 1549 | 
         
             
                if os.path.exists(DB_PATH):
         
     | 
| 1550 | 
         
            -
                    # 报告数据库大小
         
     | 
| 1551 | 
         
            -
                    db_size = os.path.getsize(DB_PATH)
         
     | 
| 1552 | 
         
            -
                    print(f"Database size: {db_size / (1024*1024*1024):.2f} GB")
         
     | 
| 1553 | 
         
            -
                    
         
     | 
| 1554 | 
         
             
                    # 创建UI并启动
         
     | 
| 1555 | 
         
            -
                    print("Initializing database connection...")
         
     | 
| 1556 | 
         
             
                    db = ModelDatabase(DB_PATH)
         
     | 
| 1557 | 
         | 
| 1558 | 
         
             
                    # 添加清理函数
         
     | 
| 1559 | 
         
             
                    def cleanup():
         
     | 
| 1560 | 
         
             
                        global db
         
     | 
| 1561 | 
         
             
                        if db:
         
     | 
| 1562 | 
         
            -
                            print("Shutting down, cleaning up resources...")
         
     | 
| 1563 | 
         
             
                            db.close()
         
     | 
| 1564 | 
         | 
| 1565 | 
         
             
                    # 注册清理函数
         
     | 
| 
         @@ -1576,8 +1625,7 @@ if __name__ == "__main__": 
     | 
|
| 1576 | 
         
             
                        inbrowser=False
         
     | 
| 1577 | 
         
             
                    )
         
     | 
| 1578 | 
         
             
                else:
         
     | 
| 1579 | 
         
            -
                     
     | 
| 1580 | 
         
            -
                    # Optionally, create a dummy DB or a message App
         
     | 
| 1581 | 
         
             
                    with gr.Blocks() as error_demo:
         
     | 
| 1582 | 
         
             
                        gr.Markdown(f"# Error: Database Not Found\nCould not find `{DB_PATH}`. Please ensure the database file is correctly placed and accessible.")
         
     | 
| 1583 | 
         
             
                    error_demo.launch(server_name="0.0.0.0")
         
     | 
| 
         | 
|
| 11 | 
         
             
            import math
         
     | 
| 12 | 
         
             
            import time
         
     | 
| 13 | 
         
             
            from huggingface_hub import hf_hub_download
         
     | 
| 14 | 
         
            +
            import psutil
         
     | 
| 15 | 
         
            +
            import gc
         
     | 
| 16 | 
         | 
| 17 | 
         
             
            # 翻译表
         
     | 
| 18 | 
         
             
            SUBJECT_TRANS = {
         
     | 
| 
         | 
|
| 32 | 
         
             
                "still-3-1.5b-preview": "STILL-3-1.5B-Preview",
         
     | 
| 33 | 
         
             
                "deepseek-r1-distill-qwen-32b": "DeepSeek-R1-Distill-Qwen-32B",
         
     | 
| 34 | 
         
             
                "light-r1-7b-ds": "Light-R1-7B-DS",
         
     | 
| 35 | 
         
            +
                "openmath-nemotron-32b": "OpenMath-Nemotron-32B",
         
     | 
| 36 | 
         
            +
                "qwen3-235b-a22b": "Qwen3-235B-A22B",
         
     | 
| 37 | 
         
             
                "skywork-or1-32b-preview": "Skywork-OR1-32B-Preview",
         
     | 
| 38 | 
         
             
                "deepscaler-1.5b-preview": "DeepScaler-1.5B-Preview",
         
     | 
| 39 | 
         
             
                "deepseek-r1-distill-qwen-7b": "DeepSeek-R1-Distill-Qwen-7B",
         
     | 
| 
         | 
|
| 46 | 
         
             
                "skywork-or1-math-7b": "Skywork-OR1-Math-7B",
         
     | 
| 47 | 
         
             
                "skywork-or1-7b-preview": "Skywork-OR1-7B-Preview",
         
     | 
| 48 | 
         
             
                "qwen3-30b-a3b": "Qwen3-30B-A3B",
         
     | 
| 49 | 
         
            +
                "deepseek-r1": "DeepSeek-R1",
         
     | 
| 50 | 
         
            +
                "glm-z1-air": "GLM-Z1-Air",
         
     | 
| 51 | 
         
            +
                "gemini-2.5-pro-exp-03-25": "Gemini 2.5 Pro Exp 0325",
         
     | 
| 52 | 
         
            +
                "o3-mini-high": "OpenAI o3-mini (high)",
         
     | 
| 53 | 
         
            +
                "qwen3-0.6b": "Qwen3-0.6B"
         
     | 
| 54 | 
         
             
                # 添加更多模型映射
         
     | 
| 55 | 
         
             
            }
         
     | 
| 56 | 
         | 
| 
         | 
|
| 79 | 
         
             
                    self.conn.execute("PRAGMA temp_store = MEMORY")  # 临时表存储在内存中
         
     | 
| 80 | 
         
             
                    self.conn.execute("PRAGMA mmap_size = 8589934592")  # 尝试使用8GB内存映射
         
     | 
| 81 | 
         
             
                    self.conn.row_factory = sqlite3.Row
         
     | 
| 
         | 
|
| 82 | 
         | 
| 83 | 
         
             
                    # 创建索引以加速查询
         
     | 
| 84 | 
         
             
                    self._ensure_indices()
         
     | 
| 
         | 
|
| 102 | 
         
             
                        cursor.execute("CREATE INDEX IF NOT EXISTS idx_problems_unique_id ON problems(unique_id)")
         
     | 
| 103 | 
         
             
                        cursor.execute("ANALYZE")  # 分析表以优化查询计划
         
     | 
| 104 | 
         
             
                    except Exception as e:
         
     | 
| 105 | 
         
            +
                        pass
         
     | 
| 106 | 
         | 
| 107 | 
         
             
                def get_available_models(self):
         
     | 
| 108 | 
         
             
                    """Get list of all available models"""
         
     | 
| 
         | 
|
| 116 | 
         
             
                        models = [row['model_name'] for row in cursor.fetchall()]
         
     | 
| 117 | 
         
             
                        self._models_cache = models  # 存储到实例缓存
         
     | 
| 118 | 
         
             
                        return models
         
     | 
| 119 | 
         
            +
                    except sqlite3.OperationalError:
         
     | 
| 
         | 
|
| 120 | 
         
             
                        return []
         
     | 
| 121 | 
         | 
| 122 | 
         
             
                def get_available_datasets(self):
         
     | 
| 
         | 
|
| 131 | 
         
             
                        datasets = [row['dataset'].upper() for row in cursor.fetchall()]
         
     | 
| 132 | 
         
             
                        self._datasets_cache = datasets  # 存储到实例缓存
         
     | 
| 133 | 
         
             
                        return datasets
         
     | 
| 134 | 
         
            +
                    except sqlite3.OperationalError:
         
     | 
| 
         | 
|
| 135 | 
         
             
                        return DATASETS
         
     | 
| 136 | 
         | 
| 137 | 
         
             
                def get_model_statistics(self, model_name, dataset):
         
     | 
| 
         | 
|
| 177 | 
         | 
| 178 | 
         
             
                        self._cache[cache_key] = stats_data
         
     | 
| 179 | 
         
             
                        return stats_data
         
     | 
| 180 | 
         
            +
                    except sqlite3.OperationalError:
         
     | 
| 
         | 
|
| 181 | 
         
             
                        return [["Database Error", "No data available"]]
         
     | 
| 182 | 
         | 
| 183 | 
         
             
                def get_all_model_accuracies(self, dataset):
         
     | 
| 
         | 
|
| 197 | 
         
             
                        results = [(row['model_name'], row['accuracy']) for row in cursor.fetchall()]
         
     | 
| 198 | 
         
             
                        self._cache[cache_key] = results
         
     | 
| 199 | 
         
             
                        return results
         
     | 
| 200 | 
         
            +
                    except sqlite3.OperationalError:
         
     | 
| 
         | 
|
| 201 | 
         
             
                        return []
         
     | 
| 202 | 
         | 
| 203 | 
         
             
                def get_problems_by_model_dataset(self, model_name, dataset):
         
     | 
| 
         | 
|
| 224 | 
         
             
                        sorted_results = sorted(results, key=lambda x: int(re.search(r'\d+', x[0]).group(0)) if re.search(r'\d+', x[0]) else 0)
         
     | 
| 225 | 
         
             
                        self._cache[cache_key] = sorted_results
         
     | 
| 226 | 
         
             
                        return sorted_results
         
     | 
| 227 | 
         
            +
                    except sqlite3.OperationalError:
         
     | 
| 
         | 
|
| 228 | 
         
             
                        return []
         
     | 
| 229 | 
         | 
| 230 | 
         
             
                def get_problem_data(self, model_name, dataset, problem_id):
         
     | 
| 
         | 
|
| 249 | 
         
             
                                # 转为字典存储,避免SQLite连接依赖
         
     | 
| 250 | 
         
             
                                self._problem_cache[problem_cache_key] = dict(problem)
         
     | 
| 251 | 
         
             
                                problem = self._problem_cache[problem_cache_key]
         
     | 
| 252 | 
         
            +
                        except Exception:
         
     | 
| 
         | 
|
| 253 | 
         
             
                            return None, None
         
     | 
| 254 | 
         | 
| 255 | 
         
             
                    if not problem:
         
     | 
| 
         | 
|
| 279 | 
         
             
                                responses = [dict(r) for r in responses]
         
     | 
| 280 | 
         
             
                                self._response_cache[resp_cache_key] = responses
         
     | 
| 281 | 
         
             
                            return problem, responses
         
     | 
| 282 | 
         
            +
                        except Exception:
         
     | 
| 
         | 
|
| 283 | 
         
             
                            return problem, None
         
     | 
| 284 | 
         
             
                    else:
         
     | 
| 285 | 
         
             
                        # 获��所有模型对此问题的响应
         
     | 
| 
         | 
|
| 304 | 
         
             
                                responses = [dict(r) for r in responses]
         
     | 
| 305 | 
         
             
                                self._response_cache[resp_cache_key] = responses
         
     | 
| 306 | 
         
             
                            return problem, responses
         
     | 
| 307 | 
         
            +
                        except Exception:
         
     | 
| 
         | 
|
| 308 | 
         
             
                            return problem, None
         
     | 
| 309 | 
         | 
| 310 | 
         
             
                def get_model_responses(self, selected_models, dataset, problem_id):
         
     | 
| 
         | 
|
| 339 | 
         
             
                    """清除指定部分或全部缓存"""
         
     | 
| 340 | 
         
             
                    if section == 'main' or section is None:
         
     | 
| 341 | 
         
             
                        self._cache = {}
         
     | 
| 
         | 
|
| 342 | 
         
             
                    if section == 'problem' or section is None:
         
     | 
| 343 | 
         
             
                        self._problem_cache = {}
         
     | 
| 
         | 
|
| 344 | 
         
             
                    if section == 'response' or section is None:
         
     | 
| 345 | 
         
             
                        self._response_cache = {}
         
     | 
| 
         | 
|
| 346 | 
         
             
                    if section == 'models' or section is None:
         
     | 
| 347 | 
         
             
                        if hasattr(self, '_models_cache'):
         
     | 
| 348 | 
         
             
                            self._models_cache = None
         
     | 
| 349 | 
         
             
                        if hasattr(self, '_datasets_cache'):
         
     | 
| 350 | 
         
             
                            self._datasets_cache = None
         
     | 
| 
         | 
|
| 351 | 
         | 
| 352 | 
         
             
                def close(self):
         
     | 
| 353 | 
         
             
                    """关闭数据库连接并释放资源"""
         
     | 
| 354 | 
         
             
                    if hasattr(self, 'conn') and self.conn:
         
     | 
| 355 | 
         
             
                        try:
         
     | 
| 356 | 
         
             
                            self.conn.close()
         
     | 
| 357 | 
         
            +
                        except Exception:
         
     | 
| 358 | 
         
            +
                            pass
         
     | 
| 
         | 
|
| 359 | 
         | 
| 360 | 
         
             
                    # 清理所有缓存
         
     | 
| 361 | 
         
             
                    self.clear_cache()
         
     | 
| 362 | 
         | 
| 363 | 
         
             
            def format_latex(text):
         
     | 
| 364 | 
         
             
                if text is None: return ""
         
     | 
| 365 | 
         
            +
                # Process the text for proper LaTeX rendering with KaTeX
         
     | 
| 366 | 
         
            +
                # KaTeX requires LaTeX backslashes to be preserved
         
     | 
| 367 | 
         
            +
                # Only replace newlines with HTML breaks
         
     | 
| 368 | 
         
             
                text = text.replace('\n', '<br>')
         
     | 
| 369 | 
         
            +
                # Wrap in a span that KaTeX can detect and render
         
     | 
| 370 | 
         
            +
                return f'<span class="math-inline">{text}</span>'
         
     | 
| 371 | 
         
            +
             
     | 
| 372 | 
         
            +
            def format_markdown_with_math(text):
         
     | 
| 373 | 
         
            +
                if text is None: return ""
         
     | 
| 374 | 
         
            +
                
         
     | 
| 375 | 
         
            +
                # Don't add HTML tags or do special processing for LaTeX - let Gradio handle it
         
     | 
| 376 | 
         
            +
                # Just clean up basic issues that might affect rendering
         
     | 
| 377 | 
         
            +
                
         
     | 
| 378 | 
         
            +
                # Convert newlines for markdown
         
     | 
| 379 | 
         
            +
                text = text.replace('\r\n', '\n').replace('\r', '\n')
         
     | 
| 380 | 
         
            +
                
         
     | 
| 381 | 
         
            +
                # Return the cleaned text for Gradio's markdown component to render
         
     | 
| 382 | 
         
             
                return text
         
     | 
| 383 | 
         | 
| 384 | 
         
             
            def get_gradient_color(accuracy, color_map='RdYlGn'):
         
     | 
| 
         | 
|
| 386 | 
         
             
                    return "#505050" # Default for missing or invalid accuracy
         
     | 
| 387 | 
         
             
                try:
         
     | 
| 388 | 
         
             
                    # 使用更深的颜色映射
         
     | 
| 389 | 
         
            +
                    cmap = plt.colormaps.get_cmap(color_map)
         
     | 
| 390 | 
         
             
                    rgba = cmap(float(accuracy))
         
     | 
| 391 | 
         | 
| 392 | 
         
             
                    # 确保颜色足够深以与白色文本形成对比
         
     | 
| 
         | 
|
| 399 | 
         
             
                    # 转回十六进制
         
     | 
| 400 | 
         
             
                    hex_color = mpl.colors.rgb2hex((r, g, b, a))
         
     | 
| 401 | 
         
             
                    return hex_color
         
     | 
| 402 | 
         
            +
                except Exception:
         
     | 
| 
         | 
|
| 403 | 
         
             
                    return "#505050"
         
     | 
| 404 | 
         | 
| 405 | 
         
             
            def get_contrasting_text_color(bg_color):
         
     | 
| 
         | 
|
| 432 | 
         
             
                # 其他颜色根据亮度决定
         
     | 
| 433 | 
         
             
                return "#000" if yiq > 160 else "#fff"
         
     | 
| 434 | 
         | 
| 435 | 
         
            +
            def format_sample_metadata(sample, show_correctness=True):
         
     | 
| 436 | 
         
            +
                """生成样本元数据的HTML格式显示"""
         
     | 
| 437 | 
         
             
                if sample is None: return ""
         
     | 
| 438 | 
         
             
                sample_dict = dict(sample) if hasattr(sample, 'keys') else sample if isinstance(sample, dict) else {}
         
     | 
| 439 | 
         
            +
                if not sample_dict: return "No sample data"
         
     | 
| 440 | 
         | 
| 441 | 
         
            +
                # 提取所需信息
         
     | 
| 442 | 
         
             
                extracted = sample_dict.get('extracted', '')
         
     | 
| 
         | 
|
| 443 | 
         
             
                correctness = sample_dict.get('correctness', 0)
         
     | 
| 444 | 
         
             
                correctness_label = "✓ Correct" if correctness else "✗ Incorrect"
         
     | 
| 445 | 
         
             
                correctness_color = "var(--color-green)" if correctness else "var(--color-red)"
         
     | 
| 
         | 
|
| 446 | 
         | 
| 447 | 
         
            +
                # 获取token信息
         
     | 
| 448 | 
         
            +
                output_tokens = sample_dict.get('output_tokens', None)
         
     | 
| 449 | 
         
            +
                reasoning_tokens = sample_dict.get('reasoning_tokens', None)
         
     | 
| 450 | 
         
            +
                
         
     | 
| 451 | 
         
            +
                # 创建元数据HTML
         
     | 
| 452 | 
         
            +
                html = f"<div style='font-size: 0.85em; padding: 10px; border-radius: 8px; margin-bottom: 5px;' class='dark-mode-compatible dark-mode-bg-secondary'>"
         
     | 
| 453 | 
         
            +
                
         
     | 
| 454 | 
         
            +
                # 创建信息行
         
     | 
| 455 | 
         
             
                if show_correctness:
         
     | 
| 456 | 
         
            +
                    html += f"<div style='display: flex; flex-wrap: wrap; align-items: center; margin-bottom: 5px;'>"
         
     | 
| 457 | 
         
            +
                    # 正确性指示器
         
     | 
| 458 | 
         
             
                    html += f"<span style='color: {correctness_color}; font-weight: bold; margin-right: 10px;'>{correctness_label}</span>"
         
     | 
| 459 | 
         
            +
                    
         
     | 
| 460 | 
         
            +
                    # 提取的答案
         
     | 
| 461 | 
         
            +
                    if extracted:
         
     | 
| 462 | 
         
            +
                        html += f"<span style='background-color: rgba(0,0,0,0.05); padding: 2px 5px; border-radius: 3px; margin-right: 10px;'><b>Extracted:</b> ${extracted}$</span>"
         
     | 
| 463 | 
         
            +
                    
         
     | 
| 464 | 
         
            +
                    # 输出token数
         
     | 
| 465 | 
         
            +
                    if output_tokens is not None:
         
     | 
| 466 | 
         
            +
                        html += f"<span style='background-color: rgba(0,0,0,0.05); padding: 2px 5px; border-radius: 3px; margin-right: 10px;'><b>Output Tokens:</b> {output_tokens}</span>"
         
     | 
| 467 | 
         
            +
                    
         
     | 
| 468 | 
         
            +
                    # 推理token数 - 仅在可用时
         
     | 
| 469 | 
         
            +
                    if reasoning_tokens is not None:
         
     | 
| 470 | 
         
            +
                        html += f"<span style='background-color: rgba(0,0,0,0.05); padding: 2px 5px; border-radius: 3px;'><b>Reasoning Tokens:</b> {reasoning_tokens}</span>"
         
     | 
| 471 | 
         
            +
                    
         
     | 
| 472 | 
         
             
                    html += f"</div>"
         
     | 
| 473 | 
         | 
| 
         | 
|
| 474 | 
         
             
                html += "</div>"
         
     | 
| 475 | 
         
             
                return html
         
     | 
| 476 | 
         | 
| 477 | 
         
            +
            def format_sample_response(sample):
         
     | 
| 478 | 
         
            +
                """生成样本响应的Markdown格式显示"""
         
     | 
| 479 | 
         
            +
                if sample is None: return ""
         
     | 
| 480 | 
         
            +
                sample_dict = dict(sample) if hasattr(sample, 'keys') else sample if isinstance(sample, dict) else {}
         
     | 
| 481 | 
         
            +
                if not sample_dict: return "No sample data"
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 482 | 
         | 
| 483 | 
         
            +
                # 获取响应内容
         
     | 
| 484 | 
         
            +
                response = sample_dict.get('response', '')
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 485 | 
         | 
| 486 | 
         
            +
                # 转义特殊标签以防止被解析为HTML
         
     | 
| 487 | 
         
            +
                # 替换<think>标签
         
     | 
| 488 | 
         
            +
                response = response.replace("<think>", "<think>")
         
     | 
| 489 | 
         
            +
                response = response.replace("</think>", "</think>")
         
     | 
| 490 | 
         
            +
                
         
     | 
| 491 | 
         
            +
                # 替换其他可能的特殊标签
         
     | 
| 492 | 
         
            +
                response = response.replace("<reasoning>", "<reasoning>")
         
     | 
| 493 | 
         
            +
                response = response.replace("</reasoning>", "</reasoning>")
         
     | 
| 494 | 
         
            +
                response = response.replace("<answer>", "<answer>")
         
     | 
| 495 | 
         
            +
                response = response.replace("</answer>", "</answer>")
         
     | 
| 496 | 
         
            +
                
         
     | 
| 497 | 
         
            +
                return response
         
     | 
| 498 | 
         | 
| 499 | 
         
            +
            def handle_sample_select(sample_number, samples_data):
         
     | 
| 500 | 
         
            +
                # 确保从Gradio State对象中提取实际值
         
     | 
| 501 | 
         
            +
                if hasattr(samples_data, 'value'):
         
     | 
| 502 | 
         
            +
                    samples_list = samples_data.value
         
     | 
| 503 | 
         
            +
                else:
         
     | 
| 504 | 
         
            +
                    samples_list = samples_data
         
     | 
| 505 | 
         
            +
                
         
     | 
| 506 | 
         
            +
                # 确保样本编号是整数
         
     | 
| 507 | 
         
            +
                try:
         
     | 
| 508 | 
         
            +
                    sample_idx = int(sample_number)
         
     | 
| 509 | 
         
            +
                except ValueError:
         
     | 
| 510 | 
         
            +
                    return "Error: Sample number must be an integer.", ""
         
     | 
| 511 | 
         
            +
                
         
     | 
| 512 | 
         
            +
                # 确保样本数据存在且为非空列表
         
     | 
| 513 | 
         
            +
                if not samples_list or not isinstance(samples_list, list) or len(samples_list) == 0:
         
     | 
| 514 | 
         
            +
                    return "No sample data available. Please select a problem first.", ""
         
     | 
| 515 | 
         
            +
                
         
     | 
| 516 | 
         
            +
                # 检查索引是否在有效范围内,如果不在范围内,显示错误消息
         
     | 
| 517 | 
         
            +
                if sample_idx < 0:
         
     | 
| 518 | 
         
            +
                    err_msg = f"**Error:** Sample number {sample_idx} is out of range. Valid range is 0 to {len(samples_list) - 1}."
         
     | 
| 519 | 
         
            +
                    return err_msg, ""
         
     | 
| 520 | 
         
            +
                
         
     | 
| 521 | 
         
            +
                if sample_idx >= len(samples_list):
         
     | 
| 522 | 
         
            +
                    err_msg = f"**Error:** Sample number {sample_idx} is out of range. Valid range is 0 to {len(samples_list) - 1}."
         
     | 
| 523 | 
         
            +
                    return err_msg, ""
         
     | 
| 524 | 
         
            +
                
         
     | 
| 525 | 
         
            +
                # 获取所选样本的数据
         
     | 
| 526 | 
         
            +
                try:
         
     | 
| 527 | 
         
            +
                    sample = samples_list[sample_idx]
         
     | 
| 528 | 
         
            +
                    formatted_metadata = format_sample_metadata(sample)
         
     | 
| 529 | 
         
            +
                    formatted_response = format_sample_response(sample)
         
     | 
| 530 | 
         
            +
                    return formatted_metadata, formatted_response
         
     | 
| 531 | 
         
            +
                except Exception as e:
         
     | 
| 532 | 
         
            +
                    err_msg = f"**Error displaying sample {sample_idx}:** {str(e)}"
         
     | 
| 533 | 
         
            +
                    return err_msg, ""
         
     | 
| 534 | 
         | 
| 535 | 
         
            +
            def handle_first_sample(samples_data):
         
     | 
| 536 | 
         
            +
                """处理并显示第一个样本(索引0)"""
         
     | 
| 537 | 
         
            +
                # 确保从Gradio State对象中提取实际值
         
     | 
| 538 | 
         
            +
                if hasattr(samples_data, 'value'):
         
     | 
| 539 | 
         
            +
                    samples_list = samples_data.value
         
     | 
| 540 | 
         
            +
                else:
         
     | 
| 541 | 
         
            +
                    samples_list = samples_data
         
     | 
| 542 | 
         
            +
                
         
     | 
| 543 | 
         
            +
                # 检查样本数据是否存在
         
     | 
| 544 | 
         
            +
                if not samples_list or not isinstance(samples_list, list) or len(samples_list) == 0:
         
     | 
| 545 | 
         
            +
                    return "No sample data available. Please select the problem and dataset first.", ""
         
     | 
| 546 | 
         
            +
                
         
     | 
| 547 | 
         
            +
                # 直接获取第一个样本,避免错误处理逻辑
         
     | 
| 548 | 
         
             
                try:
         
     | 
| 549 | 
         
            +
                    sample = samples_list[0]
         
     | 
| 550 | 
         
            +
                    formatted_metadata = format_sample_metadata(sample)
         
     | 
| 551 | 
         
            +
                    formatted_response = format_sample_response(sample)
         
     | 
| 552 | 
         
            +
                    return formatted_metadata, formatted_response
         
     | 
| 553 | 
         
             
                except Exception as e:
         
     | 
| 554 | 
         
            +
                    err_msg = f"**Error displaying first sample:** {str(e)}"
         
     | 
| 555 | 
         
            +
                    return err_msg, ""
         
     | 
| 556 | 
         | 
| 557 | 
         
            +
            def handle_comparison_problem_update(problem_id, dataset_state):
         
     | 
| 558 | 
         
            +
                """处理比较页面的问题更新,仅更新问题和答案内容,不需要模型"""
         
     | 
| 559 | 
         
            +
                global db
         
     | 
| 560 | 
         
            +
                # 确保从Gradio State对象中提取实际值
         
     | 
| 561 | 
         
            +
                dataset_name = dataset_state.value if hasattr(dataset_state, 'value') else dataset_state
         
     | 
| 562 | 
         
            +
                problem_id_value = problem_id.value if hasattr(problem_id, 'value') else problem_id
         
     | 
| 563 | 
         
            +
                
         
     | 
| 564 | 
         
            +
                if not problem_id_value or not dataset_name:
         
     | 
| 565 | 
         
            +
                    return "Please select a dataset and enter a problem ID.", "No answer available."
         
     | 
| 566 | 
         | 
| 567 | 
         
            +
                # 处理纯数字输入,构建完整unique_id
         
     | 
| 568 | 
         
            +
                if problem_id_value and problem_id_value.isdigit():
         
     | 
| 569 | 
         
            +
                    # 构建格式:OlymMATH-HARD-0-EN 或类似格��
         
     | 
| 570 | 
         
            +
                    parts = dataset_name.split('-')
         
     | 
| 571 | 
         
            +
                    if len(parts) == 2:  # 确保格式正确 (例如 "EN-HARD")
         
     | 
| 572 | 
         
            +
                        language, difficulty = parts
         
     | 
| 573 | 
         
            +
                        # 构建完整ID
         
     | 
| 574 | 
         
            +
                        problem_id_value = f"OlymMATH-{difficulty}-{problem_id_value}-{language}"
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 575 | 
         | 
| 576 | 
         
            +
                try:
         
     | 
| 577 | 
         
            +
                    # 只获取问题数据,不获取特定模型的响应
         
     | 
| 578 | 
         
            +
                    problem_data, _ = db.get_problem_data(None, dataset_name, problem_id_value)
         
     | 
| 579 | 
         
            +
                    
         
     | 
| 580 | 
         
            +
                    if not problem_data:
         
     | 
| 581 | 
         
            +
                        return f"Problem not found: {problem_id_value}. Please check the ID and try again.", "No answer available."
         
     | 
| 582 | 
         
            +
                        
         
     | 
| 583 | 
         
            +
                    problem_dict = dict(problem_data)
         
     | 
| 584 | 
         
            +
                    # Use format_markdown_with_math for proper rendering
         
     | 
| 585 | 
         
            +
                    problem_content = format_markdown_with_math(problem_dict.get('problem', ''))
         
     | 
| 586 | 
         
            +
                    
         
     | 
| 587 | 
         
            +
                    # 将答案中的双美元符号替换为单美元符号
         
     | 
| 588 | 
         
            +
                    answer_text = problem_dict.get('answer', '')
         
     | 
| 589 | 
         
            +
                    # 先将$$...$$替换为单个$...$,使用re.DOTALL处理多行
         
     | 
| 590 | 
         
            +
                    answer_text = re.sub(r'\$\$(.*?)\$\$', r'$\1$', answer_text, flags=re.DOTALL)
         
     | 
| 591 | 
         
            +
                    
         
     | 
| 592 | 
         
            +
                    # 检查答案是否已经包含美元符号,如果没有则添加
         
     | 
| 593 | 
         
            +
                    if '$' not in answer_text and answer_text.strip():
         
     | 
| 594 | 
         
            +
                        answer_text = f"${answer_text}$"
         
     | 
| 595 | 
         
            +
                        
         
     | 
| 596 | 
         
            +
                    answer_content = format_markdown_with_math(answer_text)
         
     | 
| 597 | 
         
            +
                    
         
     | 
| 598 | 
         
            +
                    return problem_content, answer_content
         
     | 
| 599 | 
         
            +
                except Exception as e:
         
     | 
| 600 | 
         
            +
                    return f"Error: {str(e)}", "No answer available."
         
     | 
| 601 | 
         | 
| 602 | 
         
             
            def handle_problem_select(problem_id_from_js, current_model_state, current_dataset_state, mode='default'):
         
     | 
| 603 | 
         
             
                global db
         
     | 
| 
         | 
|
| 606 | 
         
             
                dataset_name = current_dataset_state.value if hasattr(current_dataset_state, 'value') else current_dataset_state
         
     | 
| 607 | 
         
             
                problem_id = problem_id_from_js.value if hasattr(problem_id_from_js, 'value') else problem_id_from_js
         
     | 
| 608 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 609 | 
         
             
                # 处理纯数字输入,构建完整unique_id
         
     | 
| 610 | 
         
             
                if problem_id and problem_id.isdigit():
         
     | 
| 611 | 
         
             
                    # 构建格式:OlymMATH-HARD-0-EN 或类似格式
         
     | 
| 
         | 
|
| 615 | 
         
             
                        language, difficulty = parts
         
     | 
| 616 | 
         
             
                        # 构建完整ID
         
     | 
| 617 | 
         
             
                        problem_id = f"OlymMATH-{difficulty}-{problem_id}-{language}"
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 618 | 
         | 
| 619 | 
         
             
                if not problem_id or not dataset_name:
         
     | 
| 620 | 
         
             
                    error_message = f"Missing data: problem_id='{problem_id}', dataset='{dataset_name}'"
         
     | 
| 
         | 
|
| 621 | 
         
             
                    return "Please fill in all the fields.", "No answer available.", "", gr.State([])
         
     | 
| 622 | 
         | 
| 623 | 
         
             
                # For comparison mode, we might not have a model selected yet
         
     | 
| 
         | 
|
| 628 | 
         | 
| 629 | 
         
             
                        if not problem_data:
         
     | 
| 630 | 
         
             
                            error_message = f"Problem data not found: problem_id='{problem_id}', dataset='{dataset_name}'"
         
     | 
| 
         | 
|
| 631 | 
         
             
                            return f"Problem not found: {problem_id}. Please check the ID and try again.", "No answer available.", "", gr.State([])
         
     | 
| 632 | 
         | 
| 633 | 
         
             
                        problem_dict = dict(problem_data)
         
     | 
| 634 | 
         
            +
                        # Process problem and answer text for Markdown rendering
         
     | 
| 635 | 
         
            +
                        problem_content = format_markdown_with_math(problem_dict.get('problem', ''))
         
     | 
| 636 | 
         
            +
                        
         
     | 
| 637 | 
         
            +
                        # 将答案中的双美元符号替换为单美元符号
         
     | 
| 638 | 
         
            +
                        answer_text = problem_dict.get('answer', '')
         
     | 
| 639 | 
         
            +
                        # 先将$$...$$替换为单个$...$,使用re.DOTALL处理多行
         
     | 
| 640 | 
         
            +
                        answer_text = re.sub(r'\$\$(.*?)\$\$', r'$\1$', answer_text, flags=re.DOTALL)
         
     | 
| 641 | 
         
            +
                        
         
     | 
| 642 | 
         
            +
                        # 检查答案是否已经包含美元符号,如果没有则添加
         
     | 
| 643 | 
         
            +
                        if '$' not in answer_text and answer_text.strip():
         
     | 
| 644 | 
         
            +
                            answer_text = f"${answer_text}$"
         
     | 
| 645 | 
         
            +
                            
         
     | 
| 646 | 
         
            +
                        answer_content = format_markdown_with_math(answer_text)
         
     | 
| 647 | 
         | 
| 648 | 
         
             
                        # For comparison without model, we don't have samples to display
         
     | 
| 649 | 
         
             
                        return problem_content, answer_content, "", gr.State([])
         
     | 
| 650 | 
         
             
                    except Exception as e:
         
     | 
| 651 | 
         
             
                        error_message = f"Database error: {str(e)}"
         
     | 
| 
         | 
|
| 652 | 
         
             
                        return f"Database error occurred. Please try again.", "No answer available.", "", gr.State([])
         
     | 
| 653 | 
         | 
| 654 | 
         
             
                # The regular flow for model-specific data
         
     | 
| 655 | 
         
             
                if not model_name:
         
     | 
| 656 | 
         
             
                    error_message = f"Missing data: model='{model_name}'"
         
     | 
| 
         | 
|
| 657 | 
         
             
                    return "Please fill in all the fields.", "No answer available.", "", gr.State([])
         
     | 
| 658 | 
         | 
| 659 | 
         
             
                # The problem_id from JS should be the full unique_id. No reconstruction needed normally.
         
     | 
| 
         | 
|
| 662 | 
         | 
| 663 | 
         
             
                    if not problem_data:
         
     | 
| 664 | 
         
             
                        error_message = f"Problem data not found: problem_id='{problem_id}', model='{model_name}', dataset='{dataset_name}'"
         
     | 
| 
         | 
|
| 665 | 
         
             
                        return f"Problem not found: {problem_id}. Please check the ID and try again.", "No answer available.", "", gr.State([])
         
     | 
| 666 | 
         
             
                except Exception as e:
         
     | 
| 667 | 
         
             
                    error_message = f"Database error: {str(e)}"
         
     | 
| 
         | 
|
| 668 | 
         
             
                    return f"Database error occurred. Please try again.", "No answer available.", "", gr.State([])
         
     | 
| 669 | 
         | 
| 670 | 
         
             
                problem_dict = dict(problem_data)
         
     | 
| 671 | 
         
             
                problem_display_num = re.search(r'\d+', problem_id).group(0) if re.search(r'\d+', problem_id) else problem_id
         
     | 
| 672 | 
         | 
| 673 | 
         
            +
                # Process problem and answer text for Markdown rendering
         
     | 
| 674 | 
         
            +
                problem_content = format_markdown_with_math(problem_dict.get('problem', ''))
         
     | 
| 675 | 
         
            +
                
         
     | 
| 676 | 
         
            +
                # 将答案中的双美元符号替换为单美元符号
         
     | 
| 677 | 
         
            +
                answer_text = problem_dict.get('answer', '')
         
     | 
| 678 | 
         
            +
                # 先将$$...$$替换为单个$...$,使用re.DOTALL处理多行
         
     | 
| 679 | 
         
            +
                answer_text = re.sub(r'\$\$(.*?)\$\$', r'$\1$', answer_text, flags=re.DOTALL)
         
     | 
| 680 | 
         
            +
                
         
     | 
| 681 | 
         
            +
                # 检查答案是否已经包含美元符号,如果没有则添加
         
     | 
| 682 | 
         
            +
                if '$' not in answer_text and answer_text.strip():
         
     | 
| 683 | 
         
            +
                    answer_text = f"${answer_text}$"
         
     | 
| 684 | 
         
            +
                    
         
     | 
| 685 | 
         
            +
                answer_content = format_markdown_with_math(answer_text)
         
     | 
| 686 | 
         | 
| 687 | 
         
            +
                # Rest of the function remains the same
         
     | 
| 688 | 
         
             
                if not responses_data:
         
     | 
| 689 | 
         
             
                    samples_grid_html = "<div>No samples available for this problem.</div>"
         
     | 
| 690 | 
         
             
                    # 返回空的样本数据状态
         
     | 
| 
         | 
|
| 762 | 
         | 
| 763 | 
         
             
                        samples_grid_html += '</div>'
         
     | 
| 764 | 
         | 
| 765 | 
         
            +
                    # 第三行和第四行 - 允许所有模式显示完整的64个样本
         
     | 
| 766 | 
         
            +
                    if actual_display_count > 2*samples_per_row:
         
     | 
| 767 | 
         
             
                        # 第三行
         
     | 
| 768 | 
         
             
                        row_samples = displayed_samples[2*samples_per_row:3*samples_per_row]
         
     | 
| 769 | 
         
             
                        if row_samples:
         
     | 
| 
         | 
|
| 829 | 
         
             
                    </div>
         
     | 
| 830 | 
         
             
                    """
         
     | 
| 831 | 
         | 
| 
         | 
|
| 832 | 
         
             
                    # 获取第一个样本作为初始样本
         
     | 
| 833 | 
         
             
                    if samples_data:
         
     | 
| 834 | 
         
             
                        # 这样样本会在选择问题后立即显示
         
     | 
| 
         | 
|
| 835 | 
         
             
                        return problem_content, answer_content, final_html, gr.State(samples_data)
         
     | 
| 836 | 
         
             
                    else:
         
     | 
| 837 | 
         
             
                        return problem_content, answer_content, final_html, gr.State([])
         
     | 
| 838 | 
         | 
| 839 | 
         
            +
            def create_problem_grid_html(problems, mode='default'):
         
     | 
| 840 | 
         
            +
                """Create HTML for problem grid buttons. The JS function will be defined globally."""
         
     | 
| 841 | 
         
            +
                if not problems:
         
     | 
| 842 | 
         
            +
                    return "<div>No problems found for this model/dataset. Please select a model and dataset.</div>"
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 843 | 
         | 
| 844 | 
         
            +
                html_buttons = ""
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 845 | 
         
             
                try:
         
     | 
| 846 | 
         
            +
                    sorted_problems = sorted(
         
     | 
| 847 | 
         
            +
                        [(str(p[0]), float(p[1]) if p[1] is not None else 0.0, p[2]) for p in problems],
         
     | 
| 848 | 
         
            +
                        key=lambda x: int(re.search(r'\d+', x[0]).group(0)) if re.search(r'\d+', x[0]) else 0
         
     | 
| 849 | 
         
            +
                    )
         
     | 
| 850 | 
         
             
                except Exception as e:
         
     | 
| 851 | 
         
            +
                    return f"<div>Error displaying problems. Check logs. {e}</div>"
         
     | 
| 
         | 
|
| 852 | 
         | 
| 853 | 
         
            +
                for pid, accuracy, _ in sorted_problems:
         
     | 
| 854 | 
         
            +
                    match = re.search(r'\d+', pid)
         
     | 
| 855 | 
         
            +
                    num_display = match.group(0) if match else pid
         
     | 
| 856 | 
         
            +
                    acc_pct = int(accuracy * 100)
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 857 | 
         | 
| 858 | 
         
            +
                    # 获取背景颜色
         
     | 
| 859 | 
         
            +
                    bg_color = get_gradient_color(accuracy)
         
     | 
| 860 | 
         
            +
                    # 统一使用白色文本,添加!important确保不被覆盖
         
     | 
| 861 | 
         
            +
                    text_color = "#ffffff"
         
     | 
| 862 | 
         
            +
             
     | 
| 863 | 
         
            +
                    html_buttons += f"""
         
     | 
| 864 | 
         
            +
                    <div 
         
     | 
| 865 | 
         
            +
                        data-problem-id=\"{pid}\"
         
     | 
| 866 | 
         
            +
                        class=\"problem-btn\" 
         
     | 
| 867 | 
         
            +
                        title=\"ID: {pid} - Acc: {acc_pct}%\" 
         
     | 
| 868 | 
         
            +
                        style='background-color: {bg_color}; color: {text_color} !important;
         
     | 
| 869 | 
         
            +
                               border-radius: 4px; padding: 5px; text-align: center; font-size: 0.7em;
         
     | 
| 870 | 
         
            +
                               min-height: 36px; user-select: none; width: 100%;
         
     | 
| 871 | 
         
            +
                               display: flex; flex-direction: column; justify-content: center;
         
     | 
| 872 | 
         
            +
                               overflow: hidden; text-overflow: ellipsis; white-space: nowrap;'>
         
     | 
| 873 | 
         
            +
                        <div style="font-weight: bold; color: {text_color} !important;">{num_display}</div>
         
     | 
| 874 | 
         
            +
                        <div style="color: {text_color} !important;">{acc_pct}%</div>
         
     | 
| 875 | 
         
            +
                    </div>
         
     | 
| 876 | 
         
            +
                    """
         
     | 
| 877 | 
         | 
| 878 | 
         
            +
                # 添加自定义样式强制文本颜色为白色
         
     | 
| 879 | 
         
            +
                custom_style = "<style>.problem-btn, .problem-btn div { color: white !important; }</style>"
         
     | 
| 880 | 
         
            +
                # 根据模式设置每行显示的列数
         
     | 
| 881 | 
         
            +
                grid_cols = 20 if mode == 'comparison' else 10
         
     | 
| 882 | 
         
            +
                grid_html = f"{custom_style}<div style='display: grid; grid-template-columns: repeat({grid_cols}, 1fr); gap: 4px;'>{html_buttons}</div>"
         
     | 
| 883 | 
         
            +
                return grid_html
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 884 | 
         | 
| 885 | 
         
             
            def create_ui(db_path):
         
     | 
| 886 | 
         
             
                global db
         
     | 
| 
         | 
|
| 890 | 
         
             
                if not AVAILABLE_DATASETS:
         
     | 
| 891 | 
         
             
                    AVAILABLE_DATASETS = ["EN-HARD", "EN-EASY", "ZH-HARD", "ZH-EASY"] # Fallback
         
     | 
| 892 | 
         | 
| 893 | 
         
            +
                # Add MathJax support to the CSS
         
     | 
| 894 | 
         
             
                custom_css = """
         
     | 
| 895 | 
         
             
                .padding.svelte-phx28p { padding: unset !important; }
         
     | 
| 896 | 
         
             
                body, .gradio-container { font-family: sans-serif; font-size: 0.95em; line-height: 1.6; }
         
     | 
| 
         | 
|
| 898 | 
         
             
                .sample-btn:hover { transform: translateY(-1px); box-shadow: 0 2px 5px rgba(0,0,0,0.1); }
         
     | 
| 899 | 
         
             
                .problem-grid-container { overflow-y: auto; }
         
     | 
| 900 | 
         
             
                .math-content { overflow-x: auto; padding: 5px; }
         
     | 
| 901 | 
         
            +
                .sample-response { overflow-y: clip !important; max-height: none !important; height: auto !important; }
         
     | 
| 902 | 
         
             
                h1, h2, h3, h4, h5 { margin-top: 0.8em; margin-bottom: 0.4em; color: var(--color-text); }
         
     | 
| 903 | 
         
             
                .gradio-tabs > div[role='tablist'] button { font-size: 0.9em; padding: 8px 12px; }
         
     | 
| 904 | 
         
             
                .gr-dropdown select { font-size: 0.9em; }
         
     | 
| 
         | 
|
| 924 | 
         
             
                    //border-radius: 8px;
         
     | 
| 925 | 
         
             
                    //margin-top: 10px;
         
     | 
| 926 | 
         
             
                }
         
     | 
| 927 | 
         
            +
             
     | 
| 928 | 
         
            +
                /* MathJax Styles for Gradio's Built-in LaTeX */
         
     | 
| 929 | 
         
            +
                .math-inline, .math-display {
         
     | 
| 930 | 
         
            +
                    font-size: 110%;
         
     | 
| 931 | 
         
            +
                }
         
     | 
| 932 | 
         
            +
                .math-container p {
         
     | 
| 933 | 
         
            +
                    margin: 0.5em 0;
         
     | 
| 934 | 
         
            +
                }
         
     | 
| 935 | 
         
            +
             
     | 
| 936 | 
         
            +
                /* Markdown content styles */
         
     | 
| 937 | 
         
            +
                .gr-markdown strong {
         
     | 
| 938 | 
         
            +
                    font-weight: bold;
         
     | 
| 939 | 
         
            +
                }
         
     | 
| 940 | 
         
            +
                .gr-markdown em {
         
     | 
| 941 | 
         
            +
                    font-style: italic;
         
     | 
| 942 | 
         
            +
                }
         
     | 
| 943 | 
         
            +
                .gr-markdown ul, .gr-markdown ol {
         
     | 
| 944 | 
         
            +
                    padding-left: 2em;
         
     | 
| 945 | 
         
            +
                    margin: 0.5em 0;
         
     | 
| 946 | 
         
            +
                }
         
     | 
| 947 | 
         
            +
                .gr-markdown blockquote {
         
     | 
| 948 | 
         
            +
                    border-left: 3px solid #ccc;
         
     | 
| 949 | 
         
            +
                    margin: 0.5em 0;
         
     | 
| 950 | 
         
            +
                    padding-left: 1em;
         
     | 
| 951 | 
         
            +
                    color: #666;
         
     | 
| 952 | 
         
            +
                }
         
     | 
| 953 | 
         
            +
                .gr-markdown pre, .gr-markdown code {
         
     | 
| 954 | 
         
            +
                    background-color: rgba(0,0,0,0.05);
         
     | 
| 955 | 
         
            +
                    padding: 2px 4px;
         
     | 
| 956 | 
         
            +
                    border-radius: 3px;
         
     | 
| 957 | 
         
            +
                    font-family: monospace;
         
     | 
| 958 | 
         
            +
                }
         
     | 
| 959 | 
         
            +
                .gr-markdown table {
         
     | 
| 960 | 
         
            +
                    border-collapse: collapse;
         
     | 
| 961 | 
         
            +
                    margin: 0.5em 0;
         
     | 
| 962 | 
         
            +
                }
         
     | 
| 963 | 
         
            +
                .gr-markdown th, .gr-markdown td {
         
     | 
| 964 | 
         
            +
                    border: 1px solid #ddd;
         
     | 
| 965 | 
         
            +
                    padding: 4px 8px;
         
     | 
| 966 | 
         
            +
                }
         
     | 
| 967 | 
         
             
                """
         
     | 
| 968 | 
         | 
| 969 | 
         
             
                with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky)) as demo:
         
     | 
| 970 | 
         
            +
                    # Remove KaTeX loading script since we're using Gradio's native Markdown with LaTeX
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 971 | 
         | 
| 972 | 
         
             
                    current_dataset_state = gr.State(value=AVAILABLE_DATASETS[0] if AVAILABLE_DATASETS else "")
         
     | 
| 973 | 
         
             
                    current_model_state = gr.State(value=None)
         
     | 
| 
         | 
|
| 1032 | 
         
             
                                with gr.Column(scale=3, min_width=400):
         
     | 
| 1033 | 
         
             
                                    with gr.Tabs():
         
     | 
| 1034 | 
         
             
                                        with gr.TabItem("Problem Statement"):
         
     | 
| 1035 | 
         
            +
                                            problem_markdown_output = gr.Markdown(
         
     | 
| 1036 | 
         
            +
                                                "Please fill in all the fields.",
         
     | 
| 1037 | 
         
            +
                                                latex_delimiters=[
         
     | 
| 1038 | 
         
            +
                                                    {"left": "$", "right": "$", "display": False},
         
     | 
| 1039 | 
         
            +
                                                    {"left": "$$", "right": "$$", "display": True},
         
     | 
| 1040 | 
         
            +
                                                    {"left": "\\(", "right": "\\)", "display": False},
         
     | 
| 1041 | 
         
            +
                                                    {"left": "\\[", "right": "\\]", "display": True}
         
     | 
| 1042 | 
         
            +
                                                ]
         
     | 
| 1043 | 
         
            +
                                            )
         
     | 
| 1044 | 
         
             
                                        with gr.TabItem("Reference Answer"):
         
     | 
| 1045 | 
         
            +
                                            answer_markdown_output = gr.Markdown(
         
     | 
| 1046 | 
         
            +
                                                "No answer available.",
         
     | 
| 1047 | 
         
            +
                                                latex_delimiters=[
         
     | 
| 1048 | 
         
            +
                                                    {"left": "$", "right": "$", "display": False},
         
     | 
| 1049 | 
         
            +
                                                    {"left": "$$", "right": "$$", "display": True},
         
     | 
| 1050 | 
         
            +
                                                    {"left": "\\(", "right": "\\)", "display": False},
         
     | 
| 1051 | 
         
            +
                                                    {"left": "\\[", "right": "\\]", "display": True}
         
     | 
| 1052 | 
         
            +
                                                ]
         
     | 
| 1053 | 
         
            +
                                            )
         
     | 
| 1054 | 
         | 
| 1055 | 
         
             
                                    # 样本网格
         
     | 
| 1056 | 
         
             
                                    samples_grid_output = gr.HTML("")
         
     | 
| 
         | 
|
| 1068 | 
         
             
                                            every=0.5
         
     | 
| 1069 | 
         
             
                                        )
         
     | 
| 1070 | 
         | 
| 1071 | 
         
            +
                                    # 样本内容显示区域 - 使用HTML和Markdown组件分别显示元数据和响应内容
         
     | 
| 1072 | 
         
            +
                                    sample_metadata_output = gr.HTML(
         
     | 
| 1073 | 
         
             
                                        value="<div>Select a problem first to view samples.</div>",
         
     | 
| 1074 | 
         
            +
                                        elem_classes="sample-metadata dark-mode-bg-secondary", 
         
     | 
| 1075 | 
         
            +
                                        elem_id="sample-metadata-area"
         
     | 
| 1076 | 
         
            +
                                    )
         
     | 
| 1077 | 
         
            +
                                    
         
     | 
| 1078 | 
         
            +
                                    sample_response_output = gr.Markdown(
         
     | 
| 1079 | 
         
            +
                                        value="Select a problem first to view samples.",
         
     | 
| 1080 | 
         
            +
                                        elem_classes="sample-response dark-mode-bg-secondary", 
         
     | 
| 1081 | 
         
            +
                                        elem_id="sample-response-area",
         
     | 
| 1082 | 
         
            +
                                        latex_delimiters=[
         
     | 
| 1083 | 
         
            +
                                            {"left": "$", "right": "$", "display": False},
         
     | 
| 1084 | 
         
            +
                                            {"left": "$$", "right": "$$", "display": True},
         
     | 
| 1085 | 
         
            +
                                            {"left": "\\(", "right": "\\)", "display": False},
         
     | 
| 1086 | 
         
            +
                                            {"left": "\\[", "right": "\\]", "display": True}
         
     | 
| 1087 | 
         
            +
                                        ]
         
     | 
| 1088 | 
         
             
                                    )
         
     | 
| 1089 | 
         | 
| 1090 | 
         
             
                        with gr.TabItem("Model Comparison"):
         
     | 
| 
         | 
|
| 1112 | 
         
             
                                with gr.Column(scale=1):
         
     | 
| 1113 | 
         
             
                                    with gr.Tabs():
         
     | 
| 1114 | 
         
             
                                        with gr.TabItem("Problem Statement"):
         
     | 
| 1115 | 
         
            +
                                            comp_problem_markdown_output = gr.Markdown(
         
     | 
| 1116 | 
         
            +
                                                "Please select models and problem.",
         
     | 
| 1117 | 
         
            +
                                                latex_delimiters=[
         
     | 
| 1118 | 
         
            +
                                                    {"left": "$", "right": "$", "display": False},
         
     | 
| 1119 | 
         
            +
                                                    {"left": "$$", "right": "$$", "display": True},
         
     | 
| 1120 | 
         
            +
                                                    {"left": "\\(", "right": "\\)", "display": False},
         
     | 
| 1121 | 
         
            +
                                                    {"left": "\\[", "right": "\\]", "display": True}
         
     | 
| 1122 | 
         
            +
                                                ]
         
     | 
| 1123 | 
         
            +
                                            )
         
     | 
| 1124 | 
         
             
                                        with gr.TabItem("Reference Answer"):
         
     | 
| 1125 | 
         
            +
                                            comp_answer_markdown_output = gr.Markdown(
         
     | 
| 1126 | 
         
            +
                                                "No answer available.",
         
     | 
| 1127 | 
         
            +
                                                latex_delimiters=[
         
     | 
| 1128 | 
         
            +
                                                    {"left": "$", "right": "$", "display": False},
         
     | 
| 1129 | 
         
            +
                                                    {"left": "$$", "right": "$$", "display": True},
         
     | 
| 1130 | 
         
            +
                                                    {"left": "\\(", "right": "\\)", "display": False},
         
     | 
| 1131 | 
         
            +
                                                    {"left": "\\[", "right": "\\]", "display": True}
         
     | 
| 1132 | 
         
            +
                                                ]
         
     | 
| 1133 | 
         
            +
                                            )
         
     | 
| 1134 | 
         | 
| 1135 | 
         
             
                            # 左右两部分模型比较
         
     | 
| 1136 | 
         
             
                            with gr.Row(variant='compact'):
         
     | 
| 
         | 
|
| 1162 | 
         
             
                                            every=0.5
         
     | 
| 1163 | 
         
             
                                        )
         
     | 
| 1164 | 
         | 
| 1165 | 
         
            +
                                    # 样本内容显示区域 - 使用HTML和Markdown组件分别显示元数据和响应内容
         
     | 
| 1166 | 
         
            +
                                    comp_sample_metadata_output_left = gr.HTML(
         
     | 
| 1167 | 
         
             
                                        value="<div>Select a problem first to view samples.</div>",
         
     | 
| 1168 | 
         
            +
                                        elem_classes="sample-metadata dark-mode-bg-secondary", 
         
     | 
| 1169 | 
         
            +
                                        elem_id="comp-sample-metadata-area-left"
         
     | 
| 1170 | 
         
            +
                                    )
         
     | 
| 1171 | 
         
            +
                                    
         
     | 
| 1172 | 
         
            +
                                    comp_sample_response_output_left = gr.Markdown(
         
     | 
| 1173 | 
         
            +
                                        value="Select a problem first to view samples.",
         
     | 
| 1174 | 
         
            +
                                        elem_classes="sample-response dark-mode-bg-secondary", 
         
     | 
| 1175 | 
         
            +
                                        elem_id="comp-sample-response-area-left",
         
     | 
| 1176 | 
         
            +
                                        latex_delimiters=[
         
     | 
| 1177 | 
         
            +
                                            {"left": "$", "right": "$", "display": False},
         
     | 
| 1178 | 
         
            +
                                            {"left": "$$", "right": "$$", "display": True},
         
     | 
| 1179 | 
         
            +
                                            {"left": "\\(", "right": "\\)", "display": False},
         
     | 
| 1180 | 
         
            +
                                            {"left": "\\[", "right": "\\]", "display": True}
         
     | 
| 1181 | 
         
            +
                                        ]
         
     | 
| 1182 | 
         
             
                                    )
         
     | 
| 1183 | 
         | 
| 1184 | 
         
             
                                # 右侧模型
         
     | 
| 
         | 
|
| 1209 | 
         
             
                                            every=0.5
         
     | 
| 1210 | 
         
             
                                        )
         
     | 
| 1211 | 
         | 
| 1212 | 
         
            +
                                    # 样本内容显示区域 - 使用HTML和Markdown组件分别显示元数据和响应内容
         
     | 
| 1213 | 
         
            +
                                    comp_sample_metadata_output_right = gr.HTML(
         
     | 
| 1214 | 
         
             
                                        value="<div>Select a problem first to view samples.</div>",
         
     | 
| 1215 | 
         
            +
                                        elem_classes="sample-metadata dark-mode-bg-secondary", 
         
     | 
| 1216 | 
         
            +
                                        elem_id="comp-sample-metadata-area-right"
         
     | 
| 1217 | 
         
            +
                                    )
         
     | 
| 1218 | 
         
            +
                                    
         
     | 
| 1219 | 
         
            +
                                    comp_sample_response_output_right = gr.Markdown(
         
     | 
| 1220 | 
         
            +
                                        value="Select a problem first to view samples.",
         
     | 
| 1221 | 
         
            +
                                        elem_classes="sample-response dark-mode-bg-secondary", 
         
     | 
| 1222 | 
         
            +
                                        elem_id="comp-sample-response-area-right",
         
     | 
| 1223 | 
         
            +
                                        latex_delimiters=[
         
     | 
| 1224 | 
         
            +
                                            {"left": "$", "right": "$", "display": False},
         
     | 
| 1225 | 
         
            +
                                            {"left": "$$", "right": "$$", "display": True},
         
     | 
| 1226 | 
         
            +
                                            {"left": "\\(", "right": "\\)", "display": False},
         
     | 
| 1227 | 
         
            +
                                            {"left": "\\[", "right": "\\]", "display": True}
         
     | 
| 1228 | 
         
            +
                                        ]
         
     | 
| 1229 | 
         
             
                                    )
         
     | 
| 1230 | 
         | 
| 1231 | 
         
             
                    # --- Event Handlers --- 
         
     | 
| 
         | 
|
| 1265 | 
         
             
                               gr.Dropdown(choices=comp_model_choices if comp_model_choices else [], value=None)
         
     | 
| 1266 | 
         | 
| 1267 | 
         
             
                    def update_problem_grid_and_stats(selected_model_formatted, selected_dataset, mode='default'):
         
     | 
| 
         | 
|
| 1268 | 
         
             
                        if not selected_model_formatted or not selected_dataset:
         
     | 
| 1269 | 
         
             
                            # Return empty/default values for all outputs, including the state
         
     | 
| 1270 | 
         
             
                            return gr.DataFrame(value=[]), gr.HTML("<div>Please select a model and dataset first.</div>"), None
         
     | 
| 
         | 
|
| 1282 | 
         
             
                        problem_list = db.get_problems_by_model_dataset(model_name, selected_dataset)
         
     | 
| 1283 | 
         
             
                        grid_html = create_problem_grid_html(problem_list, mode=mode)
         
     | 
| 1284 | 
         | 
| 
         | 
|
| 1285 | 
         
             
                        # Correctly return the actual value for the current_model_state output
         
     | 
| 1286 | 
         
             
                        return gr.DataFrame(value=stats_data), gr.HTML(value=grid_html), model_name
         
     | 
| 1287 | 
         | 
| 
         | 
|
| 1300 | 
         
             
                        inputs=[],
         
     | 
| 1301 | 
         
             
                        outputs=[sample_number_input]
         
     | 
| 1302 | 
         
             
                    ).then(
         
     | 
| 1303 | 
         
            +
                        lambda: ("Please fill in all the fields.", "No answer available.", "", gr.State([]), "<div>Select a problem first to view samples.</div>", ""),
         
     | 
| 1304 | 
         
             
                        inputs=[],
         
     | 
| 1305 | 
         
            +
                        outputs=[problem_markdown_output, answer_markdown_output, samples_grid_output, current_samples_data_state, sample_metadata_output, sample_response_output]
         
     | 
| 1306 | 
         
             
                    )
         
     | 
| 1307 | 
         | 
| 1308 | 
         
             
                    # Initial population of model dropdowns based on default dataset
         
     | 
| 
         | 
|
| 1315 | 
         
             
                        inputs=[current_dataset_state],
         
     | 
| 1316 | 
         
             
                        outputs=[model_stats_df, problem_grid_html_output, current_dataset_state]
         
     | 
| 1317 | 
         
             
                    ).then(
         
     | 
| 1318 | 
         
            +
                        lambda: ("Please fill in all the fields.", "No answer available.", "", gr.State([]), "<div>Select a problem first to view samples.</div>", ""),
         
     | 
| 1319 | 
         
             
                        inputs=[],
         
     | 
| 1320 | 
         
            +
                        outputs=[problem_markdown_output, answer_markdown_output, samples_grid_output, current_samples_data_state, sample_metadata_output, sample_response_output]
         
     | 
| 1321 | 
         
             
                    ).then(
         
     | 
| 1322 | 
         
             
                        # 重置Sample Number为0
         
     | 
| 1323 | 
         
             
                        fn=lambda: "0",
         
     | 
| 
         | 
|
| 1381 | 
         
             
                            problem_content, answer_content, samples_grid_html, new_samples_data = handle_problem_select_comparison(current_problem_id, new_model_state, current_dataset)
         
     | 
| 1382 | 
         | 
| 1383 | 
         
             
                            # 获取第一个样本的内容
         
     | 
| 1384 | 
         
            +
                            first_metadata, first_response = handle_first_sample(new_samples_data)
         
     | 
| 1385 | 
         | 
| 1386 | 
         
            +
                            return grid_html, new_model_state, problem_content, answer_content, samples_grid_html, new_samples_data, first_metadata, first_response
         
     | 
| 1387 | 
         
             
                        else:
         
     | 
| 1388 | 
         
             
                            # 没有问题ID,只返回更新的模型状态
         
     | 
| 1389 | 
         
            +
                            return grid_html, new_model_state, "Please enter a problem ID.", "No answer available.", "", gr.State([]), "<div>Select a problem first to view samples.</div>", ""
         
     | 
| 1390 | 
         | 
| 1391 | 
         
             
                    # 修改model_dropdown的处理函数,以重新查询当前问题响应 - 比较页面右侧
         
     | 
| 1392 | 
         
             
                    def update_model_and_requery_problem_right(model_dropdown_value, current_dataset, current_problem_id):
         
     | 
| 
         | 
|
| 1399 | 
         
             
                            _, _, samples_grid_html, new_samples_data = handle_problem_select_comparison(current_problem_id, new_model_state, current_dataset)
         
     | 
| 1400 | 
         | 
| 1401 | 
         
             
                            # 获取第一个样本的内容
         
     | 
| 1402 | 
         
            +
                            first_metadata, first_response = handle_first_sample(new_samples_data)
         
     | 
| 1403 | 
         | 
| 1404 | 
         
            +
                            return grid_html, new_model_state, samples_grid_html, new_samples_data, first_metadata, first_response
         
     | 
| 1405 | 
         
             
                        else:
         
     | 
| 1406 | 
         
             
                            # 没有问题ID,只返回更新的模型状态
         
     | 
| 1407 | 
         
            +
                            return grid_html, new_model_state, "", gr.State([]), "<div>Select a problem first to view samples.</div>", ""
         
     | 
| 1408 | 
         | 
| 1409 | 
         
             
                    # 左侧模型选择事件
         
     | 
| 1410 | 
         
             
                    comp_model_dropdown_left.change(
         
     | 
| 1411 | 
         
             
                        fn=update_model_and_requery_problem_left,
         
     | 
| 1412 | 
         
             
                        inputs=[comp_model_dropdown_left, comp_dataset_state, comp_problem_state_input],
         
     | 
| 1413 | 
         
            +
                        outputs=[comp_problem_grid_html_output_left, comp_model_state_left, comp_problem_markdown_output, comp_answer_markdown_output, comp_samples_grid_output_left, comp_samples_data_state_left, comp_sample_metadata_output_left, comp_sample_response_output_left]
         
     | 
| 1414 | 
         
             
                    ).then(
         
     | 
| 1415 | 
         
             
                        # 重置Sample Number为0
         
     | 
| 1416 | 
         
             
                        fn=lambda: "0",
         
     | 
| 
         | 
|
| 1422 | 
         
             
                    comp_model_dropdown_right.change(
         
     | 
| 1423 | 
         
             
                        fn=update_model_and_requery_problem_right,
         
     | 
| 1424 | 
         
             
                        inputs=[comp_model_dropdown_right, comp_dataset_state, comp_problem_state_input],
         
     | 
| 1425 | 
         
            +
                        outputs=[comp_problem_grid_html_output_right, comp_model_state_right, comp_samples_grid_output_right, comp_samples_data_state_right, comp_sample_metadata_output_right, comp_sample_response_output_right]
         
     | 
| 1426 | 
         
             
                    ).then(
         
     | 
| 1427 | 
         
             
                        # 重置Sample Number为0
         
     | 
| 1428 | 
         
             
                        fn=lambda: "0",
         
     | 
| 
         | 
|
| 1430 | 
         
             
                        outputs=[comp_sample_number_input_right]
         
     | 
| 1431 | 
         
             
                    )
         
     | 
| 1432 | 
         | 
| 1433 | 
         
            +
                    # 左侧样本选择
         
     | 
| 1434 | 
         
            +
                    comp_sample_number_input_left.change(
         
     | 
| 1435 | 
         
            +
                        fn=handle_sample_select,
         
     | 
| 1436 | 
         
            +
                        inputs=[comp_sample_number_input_left, comp_samples_data_state_left],
         
     | 
| 1437 | 
         
            +
                        outputs=[comp_sample_metadata_output_left, comp_sample_response_output_left]
         
     | 
| 1438 | 
         
            +
                    )
         
     | 
| 1439 | 
         
            +
                    
         
     | 
| 1440 | 
         
            +
                    # 右侧样本选择
         
     | 
| 1441 | 
         
            +
                    comp_sample_number_input_right.change(
         
     | 
| 1442 | 
         
            +
                        fn=handle_sample_select,
         
     | 
| 1443 | 
         
            +
                        inputs=[comp_sample_number_input_right, comp_samples_data_state_right],
         
     | 
| 1444 | 
         
            +
                        outputs=[comp_sample_metadata_output_right, comp_sample_response_output_right]
         
     | 
| 1445 | 
         
            +
                    )
         
     | 
| 1446 | 
         
            +
                    
         
     | 
| 1447 | 
         
            +
                    # 为比较页面问题选择事件添加处理
         
     | 
| 1448 | 
         
             
                    comp_problem_state_input.change(
         
     | 
| 1449 | 
         
             
                        fn=handle_problem_select_comparison,
         
     | 
| 1450 | 
         
             
                        inputs=[comp_problem_state_input, comp_model_state_left, comp_dataset_state],
         
     | 
| 
         | 
|
| 1457 | 
         
             
                    ).then(
         
     | 
| 1458 | 
         
             
                        fn=handle_first_sample,
         
     | 
| 1459 | 
         
             
                        inputs=[comp_samples_data_state_left],
         
     | 
| 1460 | 
         
            +
                        outputs=[comp_sample_metadata_output_left, comp_sample_response_output_left]
         
     | 
| 1461 | 
         
             
                    )
         
     | 
| 1462 | 
         | 
| 1463 | 
         
             
                    # 问题选择事件 - 右侧模型
         
     | 
| 
         | 
|
| 1473 | 
         
             
                    ).then(
         
     | 
| 1474 | 
         
             
                        fn=handle_first_sample,
         
     | 
| 1475 | 
         
             
                        inputs=[comp_samples_data_state_right],
         
     | 
| 1476 | 
         
            +
                        outputs=[comp_sample_metadata_output_right, comp_sample_response_output_right]
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1477 | 
         
             
                    )
         
     | 
| 1478 | 
         | 
| 1479 | 
         
             
                    # This is the crucial link: problem_state_input is changed by user, triggers this Python callback.
         
     | 
| 
         | 
|
| 1480 | 
         
             
                    problem_state_input.change(
         
     | 
| 1481 | 
         
             
                        fn=handle_problem_select,
         
     | 
| 1482 | 
         
             
                        inputs=[problem_state_input, current_model_state, current_dataset_state],
         
     | 
| 
         | 
|
| 1489 | 
         
             
                    ).then(
         
     | 
| 1490 | 
         
             
                        fn=handle_first_sample,
         
     | 
| 1491 | 
         
             
                        inputs=[current_samples_data_state],
         
     | 
| 1492 | 
         
            +
                        outputs=[sample_metadata_output, sample_response_output]
         
     | 
| 1493 | 
         
             
                    )
         
     | 
| 1494 | 
         | 
| 1495 | 
         
             
                    # Also listen for direct input event which may be more reliable than change
         
     | 
| 
         | 
|
| 1505 | 
         
             
                    ).then(
         
     | 
| 1506 | 
         
             
                        fn=handle_first_sample,
         
     | 
| 1507 | 
         
             
                        inputs=[current_samples_data_state],
         
     | 
| 1508 | 
         
            +
                        outputs=[sample_metadata_output, sample_response_output]
         
     | 
| 1509 | 
         
             
                    )
         
     | 
| 1510 | 
         | 
| 1511 | 
         
             
                    # 添加样本编号的事件处理
         
     | 
| 1512 | 
         
             
                    sample_number_input.change(
         
     | 
| 1513 | 
         
             
                        fn=handle_sample_select,
         
     | 
| 1514 | 
         
             
                        inputs=[sample_number_input, current_samples_data_state],
         
     | 
| 1515 | 
         
            +
                        outputs=[sample_metadata_output, sample_response_output]
         
     | 
| 1516 | 
         
             
                    )
         
     | 
| 1517 | 
         | 
| 1518 | 
         
             
                    sample_number_input.input(
         
     | 
| 1519 | 
         
             
                        fn=handle_sample_select,
         
     | 
| 1520 | 
         
             
                        inputs=[sample_number_input, current_samples_data_state],
         
     | 
| 1521 | 
         
            +
                        outputs=[sample_metadata_output, sample_response_output]
         
     | 
| 1522 | 
         
             
                    )
         
     | 
| 1523 | 
         | 
| 1524 | 
         
             
                    # 修改model_dropdown.change处理函数,以重新查询当前问题响应
         
     | 
| 
         | 
|
| 1531 | 
         
             
                            problem_content, answer_content, samples_grid_html, new_samples_data = handle_problem_select(current_problem_id, new_model_state, current_dataset)
         
     | 
| 1532 | 
         | 
| 1533 | 
         
             
                            # 获取第一个样本的内容
         
     | 
| 1534 | 
         
            +
                            first_metadata, first_response = handle_first_sample(new_samples_data)
         
     | 
| 1535 | 
         | 
| 1536 | 
         
            +
                            return stats_df, grid_html, new_model_state, problem_content, answer_content, samples_grid_html, new_samples_data, first_metadata, first_response
         
     | 
| 1537 | 
         
             
                        else:
         
     | 
| 1538 | 
         
             
                            # 没有问题ID,只返回更新的模型状态
         
     | 
| 1539 | 
         
            +
                            return stats_df, grid_html, new_model_state, "Please fill in all the fields.", "No answer available.", "", gr.State([]), "<div>Select a problem first to view samples.</div>", ""
         
     | 
| 1540 | 
         | 
| 1541 | 
         
             
                    model_dropdown.change(
         
     | 
| 1542 | 
         
             
                        fn=update_model_and_requery_problem,
         
     | 
| 1543 | 
         
             
                        inputs=[model_dropdown, current_dataset_state, problem_state_input], 
         
     | 
| 1544 | 
         
            +
                        outputs=[model_stats_df, problem_grid_html_output, current_model_state, problem_markdown_output, answer_markdown_output, samples_grid_output, current_samples_data_state, sample_metadata_output, sample_response_output]
         
     | 
| 1545 | 
         
             
                    ).then(
         
     | 
| 1546 | 
         
             
                        # 重置Sample Number为0
         
     | 
| 1547 | 
         
             
                        fn=lambda: "0",
         
     | 
| 
         | 
|
| 1562 | 
         | 
| 1563 | 
         
             
                    # 如果内存使用超过12GB (激进设置),清理缓存
         
     | 
| 1564 | 
         
             
                    if memory_usage_mb > 12000:  # 12GB
         
     | 
| 
         | 
|
| 1565 | 
         
             
                        if db:
         
     | 
| 1566 | 
         
             
                            db.clear_cache('response')  # 优先清理响应缓存
         
     | 
| 1567 | 
         
             
                            gc.collect()
         
     | 
| 1568 | 
         
             
                    # 如果内存使用超过14GB,更激进地清理
         
     | 
| 1569 | 
         
             
                    if memory_usage_mb > 14000:  # 14GB
         
     | 
| 
         | 
|
| 1570 | 
         
             
                        if db:
         
     | 
| 1571 | 
         
             
                            db.clear_cache()  # 清理所有缓存
         
     | 
| 1572 | 
         
             
                            gc.collect()
         
     | 
| 1573 | 
         | 
| 1574 | 
         
             
                    return f"Memory: {memory_usage_mb:.1f} MB"
         
     | 
| 1575 | 
         
             
                except Exception as e:
         
     | 
| 
         | 
|
| 1576 | 
         
             
                    return "Memory monitor error"
         
     | 
| 1577 | 
         | 
| 1578 | 
         
             
            # 修改主函数以使用优化策略
         
     | 
| 
         | 
|
| 1581 | 
         | 
| 1582 | 
         
             
                # 检查数据库文件是否存在,如果不存在则从 Hugging Face 下载
         
     | 
| 1583 | 
         
             
                if not os.path.exists(DB_PATH):
         
     | 
| 
         | 
|
| 1584 | 
         
             
                    try:
         
     | 
| 1585 | 
         
             
                        # 从环境变量获取 HF_TOKEN
         
     | 
| 1586 | 
         
             
                        hf_token = os.environ.get("HF_TOKEN")
         
     | 
| 
         | 
|
| 1594 | 
         
             
                            repo_type="dataset",
         
     | 
| 1595 | 
         
             
                            token=hf_token
         
     | 
| 1596 | 
         
             
                        )
         
     | 
| 
         | 
|
| 1597 | 
         
             
                    except Exception as e:
         
     | 
| 
         | 
|
| 1598 | 
         
             
                        # 创建一个显示错误信息的简单 Gradio 应用
         
     | 
| 1599 | 
         
             
                        with gr.Blocks() as error_demo:
         
     | 
| 1600 | 
         
             
                            gr.Markdown(f"# Error: Database Download Failed\n{str(e)}\nPlease ensure HF_TOKEN is set correctly and try again.")
         
     | 
| 
         | 
|
| 1602 | 
         
             
                        exit(1)
         
     | 
| 1603 | 
         | 
| 1604 | 
         
             
                if os.path.exists(DB_PATH):
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1605 | 
         
             
                    # 创建UI并启动
         
     | 
| 
         | 
|
| 1606 | 
         
             
                    db = ModelDatabase(DB_PATH)
         
     | 
| 1607 | 
         | 
| 1608 | 
         
             
                    # 添加清理函数
         
     | 
| 1609 | 
         
             
                    def cleanup():
         
     | 
| 1610 | 
         
             
                        global db
         
     | 
| 1611 | 
         
             
                        if db:
         
     | 
| 
         | 
|
| 1612 | 
         
             
                            db.close()
         
     | 
| 1613 | 
         | 
| 1614 | 
         
             
                    # 注册清理函数
         
     | 
| 
         | 
|
| 1625 | 
         
             
                        inbrowser=False
         
     | 
| 1626 | 
         
             
                    )
         
     | 
| 1627 | 
         
             
                else:
         
     | 
| 1628 | 
         
            +
                    # 创建一个显示错误信息的简单 Gradio 应用
         
     | 
| 
         | 
|
| 1629 | 
         
             
                    with gr.Blocks() as error_demo:
         
     | 
| 1630 | 
         
             
                        gr.Markdown(f"# Error: Database Not Found\nCould not find `{DB_PATH}`. Please ensure the database file is correctly placed and accessible.")
         
     | 
| 1631 | 
         
             
                    error_demo.launch(server_name="0.0.0.0")
         
     |