Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -477,7 +477,6 @@ def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
|
|
477 |
results = {
|
478 |
"model_name": [],
|
479 |
"topic": [],
|
480 |
-
"difficulty": [],
|
481 |
"question_prompt": [],
|
482 |
"question": [],
|
483 |
"answer": [],
|
@@ -759,7 +758,6 @@ def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
|
|
759 |
|
760 |
results["model_name"].append(model_id)
|
761 |
results["topic"].append(topic)
|
762 |
-
results["difficulty"].append(difficulty)
|
763 |
results["question_prompt"].append(question_prompt)
|
764 |
results["question"].append(question)
|
765 |
results["answer"].append(answer)
|
@@ -851,7 +849,7 @@ def check_model_availability(models, token):
|
|
851 |
return availability_results
|
852 |
|
853 |
# Streamlit UI
|
854 |
-
st.title("
|
855 |
|
856 |
# Setup sidebar for configuration
|
857 |
st.sidebar.header("Configuration")
|
@@ -950,6 +948,7 @@ if st.sidebar.button("Start Benchmark"):
|
|
950 |
try:
|
951 |
# Update status
|
952 |
status_text.text("Benchmark running...")
|
|
|
953 |
|
954 |
# Run benchmark and get results
|
955 |
results, cumulative_avg_rank, total_successful = run_benchmark(
|
|
|
477 |
results = {
|
478 |
"model_name": [],
|
479 |
"topic": [],
|
|
|
480 |
"question_prompt": [],
|
481 |
"question": [],
|
482 |
"answer": [],
|
|
|
758 |
|
759 |
results["model_name"].append(model_id)
|
760 |
results["topic"].append(topic)
|
|
|
761 |
results["question_prompt"].append(question_prompt)
|
762 |
results["question"].append(question)
|
763 |
results["answer"].append(answer)
|
|
|
849 |
return availability_results
|
850 |
|
851 |
# Streamlit UI
|
852 |
+
st.title("AutoBench 1.0")
|
853 |
|
854 |
# Setup sidebar for configuration
|
855 |
st.sidebar.header("Configuration")
|
|
|
948 |
try:
|
949 |
# Update status
|
950 |
status_text.text("Benchmark running...")
|
951 |
+
status_text.text("This will take a few minutes depending on the number of models and iterations chosen. If you want to follow progress in the question-answer-ranking process per each iteration, check the container log (above, next to the \"running\" button)
|
952 |
|
953 |
# Run benchmark and get results
|
954 |
results, cumulative_avg_rank, total_successful = run_benchmark(
|