PeterKruger commited on
Commit
2641425
·
verified ·
1 Parent(s): 787c817

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -849,8 +849,8 @@ def check_model_availability(models, token):
849
  return availability_results
850
 
851
  # Streamlit UI
852
- st.title("AutoBench 1.0")
853
- st.write(f"A Many-Model-As-Judge system to benchmark LLMs.\nChose the models you want to evaluate (at least 2) and they will rank each other against the selected topics. But first if models are available (this may depend on your Hugging face account.")
854
 
855
  # Setup sidebar for configuration
856
  st.sidebar.header("Configuration")
@@ -948,7 +948,7 @@ if st.sidebar.button("Start Benchmark"):
948
  # Run the benchmark
949
  try:
950
  # Update status
951
- status_text.text("Benchmark running...\n\nThis will take a few minutes depending on the number of models and iterations chosen. If you want to follow progress in the question-answer-ranking process per each iteration, check the container log (above, next to the \"running\" button")
952
 
953
  # Run benchmark and get results
954
  results, cumulative_avg_rank, total_successful = run_benchmark(
 
849
  return availability_results
850
 
851
  # Streamlit UI
852
+ st.title("AutoBench 1.0 Lite")
853
+ status_text.text("A Many-Model-As-Judge system to generate a customizable LLM benchmark.\nChose the models you want to evaluate (at least 2) and they will rank each other against the selected topics. But first check if models are available (this will depend on your Hugging face account. Premium is strongly recomended)")
854
 
855
  # Setup sidebar for configuration
856
  st.sidebar.header("Configuration")
 
948
  # Run the benchmark
949
  try:
950
  # Update status
951
+ status_text.text("Benchmark running...\n\nThis will take a few minutes depending on the number of models and iterations chosen. If you want to follow progress in the question-answer-ranking process per each iteration, check the container log (above, next to the \"running\" button)")
952
 
953
  # Run benchmark and get results
954
  results, cumulative_avg_rank, total_successful = run_benchmark(