PeterKruger commited on
Commit
5c21f77
·
verified ·
1 Parent(s): e0e94ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -3
app.py CHANGED
@@ -477,7 +477,6 @@ def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
477
  results = {
478
  "model_name": [],
479
  "topic": [],
480
- "difficulty": [],
481
  "question_prompt": [],
482
  "question": [],
483
  "answer": [],
@@ -759,7 +758,6 @@ def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
759
 
760
  results["model_name"].append(model_id)
761
  results["topic"].append(topic)
762
- results["difficulty"].append(difficulty)
763
  results["question_prompt"].append(question_prompt)
764
  results["question"].append(question)
765
  results["answer"].append(answer)
@@ -851,7 +849,7 @@ def check_model_availability(models, token):
851
  return availability_results
852
 
853
  # Streamlit UI
854
- st.title("LLM Benchmark")
855
 
856
  # Setup sidebar for configuration
857
  st.sidebar.header("Configuration")
@@ -950,6 +948,7 @@ if st.sidebar.button("Start Benchmark"):
950
  try:
951
  # Update status
952
  status_text.text("Benchmark running...")
 
953
 
954
  # Run benchmark and get results
955
  results, cumulative_avg_rank, total_successful = run_benchmark(
 
477
  results = {
478
  "model_name": [],
479
  "topic": [],
 
480
  "question_prompt": [],
481
  "question": [],
482
  "answer": [],
 
758
 
759
  results["model_name"].append(model_id)
760
  results["topic"].append(topic)
 
761
  results["question_prompt"].append(question_prompt)
762
  results["question"].append(question)
763
  results["answer"].append(answer)
 
849
  return availability_results
850
 
851
  # Streamlit UI
852
+ st.title("AutoBench 1.0")
853
 
854
  # Setup sidebar for configuration
855
  st.sidebar.header("Configuration")
 
948
  try:
949
  # Update status
950
  status_text.text("Benchmark running...")
951
+ status_text.text("This will take a few minutes depending on the number of models and iterations chosen. If you want to follow progress in the question-answer-ranking process per each iteration, check the container log (above, next to the \"running\" button)
952
 
953
  # Run benchmark and get results
954
  results, cumulative_avg_rank, total_successful = run_benchmark(