PeterKruger commited on
Commit
787c817
·
verified ·
1 Parent(s): 77b8565

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -850,6 +850,7 @@ def check_model_availability(models, token):
850
 
851
  # Streamlit UI
852
  st.title("AutoBench 1.0")
 
853
 
854
  # Setup sidebar for configuration
855
  st.sidebar.header("Configuration")
@@ -947,8 +948,7 @@ if st.sidebar.button("Start Benchmark"):
947
  # Run the benchmark
948
  try:
949
  # Update status
950
- status_text.text("Benchmark running...")
951
- status_text.text("\n\nThis will take a few minutes depending on the number of models and iterations chosen. If you want to follow progress in the question-answer-ranking process per each iteration, check the container log (above, next to the \"running\" button")
952
 
953
  # Run benchmark and get results
954
  results, cumulative_avg_rank, total_successful = run_benchmark(
 
850
 
851
  # Streamlit UI
852
  st.title("AutoBench 1.0")
853
+ st.write(f"A Many-Model-As-Judge system to benchmark LLMs.\nChose the models you want to evaluate (at least 2) and they will rank each other against the selected topics. But first if models are available (this may depend on your Hugging face account.")
854
 
855
  # Setup sidebar for configuration
856
  st.sidebar.header("Configuration")
 
948
  # Run the benchmark
949
  try:
950
  # Update status
951
+ status_text.text("Benchmark running...\n\nThis will take a few minutes depending on the number of models and iterations chosen. If you want to follow progress in the question-answer-ranking process per each iteration, check the container log (above, next to the \"running\" button")
 
952
 
953
  # Run benchmark and get results
954
  results, cumulative_avg_rank, total_successful = run_benchmark(