Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -850,6 +850,7 @@ def check_model_availability(models, token):
|
|
850 |
|
851 |
# Streamlit UI
|
852 |
st.title("AutoBench 1.0")
|
|
|
853 |
|
854 |
# Setup sidebar for configuration
|
855 |
st.sidebar.header("Configuration")
|
@@ -947,8 +948,7 @@ if st.sidebar.button("Start Benchmark"):
|
|
947 |
# Run the benchmark
|
948 |
try:
|
949 |
# Update status
|
950 |
-
status_text.text("Benchmark running
|
951 |
-
status_text.text("\n\nThis will take a few minutes depending on the number of models and iterations chosen. If you want to follow progress in the question-answer-ranking process per each iteration, check the container log (above, next to the \"running\" button")
|
952 |
|
953 |
# Run benchmark and get results
|
954 |
results, cumulative_avg_rank, total_successful = run_benchmark(
|
|
|
850 |
|
851 |
# Streamlit UI
|
852 |
st.title("AutoBench 1.0")
|
853 |
+
st.write(f"A Many-Model-As-Judge system to benchmark LLMs.\nChose the models you want to evaluate (at least 2) and they will rank each other against the selected topics. But first if models are available (this may depend on your Hugging face account.")
|
854 |
|
855 |
# Setup sidebar for configuration
|
856 |
st.sidebar.header("Configuration")
|
|
|
948 |
# Run the benchmark
|
949 |
try:
|
950 |
# Update status
|
951 |
+
status_text.text("Benchmark running...\n\nThis will take a few minutes depending on the number of models and iterations chosen. If you want to follow progress in the question-answer-ranking process per each iteration, check the container log (above, next to the \"running\" button")
|
|
|
952 |
|
953 |
# Run benchmark and get results
|
954 |
results, cumulative_avg_rank, total_successful = run_benchmark(
|