Spaces:

AutoBench
/

AutoBench_1.0_Demo

Running

PeterKruger commited on Feb 28

Commit

2641425

verified ·

1 Parent(s): 787c817

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -849,8 +849,8 @@ def check_model_availability(models, token):
     return availability_results
 # Streamlit UI
-st.title("AutoBench 1.0")
-st.write(f"A Many-Model-As-Judge system to benchmark LLMs.\nChose the models you want to evaluate (at least 2) and they will rank each other against the selected topics. But first if models are available (this may depend on your Hugging face account.")
 # Setup sidebar for configuration
 st.sidebar.header("Configuration")
@@ -948,7 +948,7 @@ if st.sidebar.button("Start Benchmark"):
         # Run the benchmark
         try:
             # Update status
-            status_text.text("Benchmark running...\n\nThis will take a few minutes depending on the number of models and iterations chosen. If you want to follow progress in the question-answer-ranking process per each iteration, check the container log (above, next to the \"running\" button")
             # Run benchmark and get results
             results, cumulative_avg_rank, total_successful = run_benchmark(

     return availability_results
 # Streamlit UI
+st.title("AutoBench 1.0 Lite")
+status_text.text("A Many-Model-As-Judge system to generate a customizable LLM benchmark.\nChose the models you want to evaluate (at least 2) and they will rank each other against the selected topics. But first check if models are available (this will depend on your Hugging face account. Premium is strongly recomended)")
 # Setup sidebar for configuration
 st.sidebar.header("Configuration")
         # Run the benchmark
         try:
             # Update status
+            status_text.text("Benchmark running...\n\nThis will take a few minutes depending on the number of models and iterations chosen. If you want to follow progress in the question-answer-ranking process per each iteration, check the container log (above, next to the \"running\" button)")
             # Run benchmark and get results
             results, cumulative_avg_rank, total_successful = run_benchmark(