Spaces:

MrSimple01
/

RuSimulBench_arena

Sleeping

App Files Files Community

MrSimple01 commited on Mar 20

Commit

7149d5b

verified ·

1 Parent(s): 145fdde

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -7

app.py CHANGED Viewed

@@ -113,18 +113,27 @@ def evaluate_models(file_path, api_key, prompt_col, selected_models=None, progre
     combined_benchmark_path = f'results/benchmark_results_{timestamp}.csv'
     benchmark_df.to_csv(combined_benchmark_path, index=False)
-    # Create visualizations
     progress(0.95, desc="Creating visualizations...")
     radar_chart_path = create_radar_chart(all_results)
     bar_chart_path = create_bar_chart(all_results)
     progress(1.0, desc="Evaluation complete!")
-    # Sort results by combined score
     sorted_results = benchmark_df.sort_values(by='combined_score', ascending=False)
     return sorted_results, radar_chart_path, bar_chart_path, combined_benchmark_path
 def create_gradio_interface():
     with gr.Blocks(title="LLM Evaluation Tool") as app:
         gr.Markdown("# LLM Evaluation Tool")
@@ -188,11 +197,9 @@ def create_gradio_interface():
             with gr.Row():
                 gr.Markdown("### Leaderboard Details")
                 gr.Markdown("""
-                - **Креативность**: Оригинальность и инновационность ответов
-                - **Разнообразие**: Использование различных языковых средств и стилистических приемов
-                - **Релевантность**: Соответствие ответа исходному запросу
-                - **Стабильность**: Насколько хорошо модель сохраняет смысл и контекст запроса
-                - **Общий балл**: Среднее значение всех показателей
                 """)
     return app

     combined_benchmark_path = f'results/benchmark_results_{timestamp}.csv'
     benchmark_df.to_csv(combined_benchmark_path, index=False)
     progress(0.95, desc="Creating visualizations...")
     radar_chart_path = create_radar_chart(all_results)
     bar_chart_path = create_bar_chart(all_results)
     progress(1.0, desc="Evaluation complete!")
     sorted_results = benchmark_df.sort_values(by='combined_score', ascending=False)
     return sorted_results, radar_chart_path, bar_chart_path, combined_benchmark_path
+def get_leaderboard_data():
+    return [
+        ["Vikhr", "7.75", "0.9363600260019302", "0.860"],
+        ["Llama3", "7.30", "0.9410231244564057", "0.827"],
+        ["Mistral", "6.95", "0.9459488660097122", "0.807"],
+        ["Owen", "6.93", "0.945682458281517", "0.800"],
+        ["TinyLlama", "1.12", "0.945682458281517", "0.573"]
+    ]
 def create_gradio_interface():
     with gr.Blocks(title="LLM Evaluation Tool") as app:
         gr.Markdown("# LLM Evaluation Tool")
             with gr.Row():
                 gr.Markdown("### Leaderboard Details")
                 gr.Markdown("""
+                - **Креативность**: Оригинальность и инновационность ответов (шкала до 10)
+                - **Стабильность**: Коэффициент стабильности модели (0-1)
+                - **Общий балл**: Средний комбинированный показатель производительности (0-1)
                 """)
     return app