Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -779,8 +779,8 @@ def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
|
|
779 |
|
780 |
# --- Print and store iteration results IMMEDIATELY after ranking for this model ---
|
781 |
ranks_str = "[" + ", ".join(map(str, [ranks.get(m, None) for m in active_models])) + "]" if ranks else "[]" # Format ranks for CSV, ensure order, use .get() for safety
|
782 |
-
print(f"{topic}, {difficulty_mapping[difficulty]}, {model_id}, Avg Rank: {cumulative_avg_rank.get(model_id, np.nan):.2f}, Avg Rank for run: {average_rank:.
|
783 |
-
st.write(f"{topic}, {difficulty_mapping[difficulty]}, {model_id}, Avg Rank: {cumulative_avg_rank.get(model_id, np.nan):.2f}, Avg Rank for run: {average_rank:.
|
784 |
|
785 |
# Write iteration results to file (append mode) - write for each model right after ranking
|
786 |
iteration_results_file_opened.write(f"{iteration+1},{topic}, {difficulty_mapping[difficulty]},{question_avg_rank:.2f},{question_ranking_duration_total:.2f},{model_id},{cumulative_avg_rank.get(model_id, np.nan):.2f},{average_rank:.2f},{ranks_str},{ranking_duration:.2f}\n")
|
@@ -968,7 +968,7 @@ if st.sidebar.button("Start Benchmark"):
|
|
968 |
|
969 |
# Update progress to complete
|
970 |
progress_bar.progress(100)
|
971 |
-
|
972 |
|
973 |
# Display results
|
974 |
if total_successful > 0:
|
|
|
779 |
|
780 |
# --- Print and store iteration results IMMEDIATELY after ranking for this model ---
|
781 |
ranks_str = "[" + ", ".join(map(str, [ranks.get(m, None) for m in active_models])) + "]" if ranks else "[]" # Format ranks for CSV, ensure order, use .get() for safety
|
782 |
+
print(f"{topic}, {difficulty_mapping[difficulty]}, {model_id}, Avg Rank: {cumulative_avg_rank.get(model_id, np.nan):.2f}, Avg Rank for run: {average_rank:.2f}, Ranks: {ranks_str}, {ranking_duration:.2f} s")
|
783 |
+
st.write(f"{topic}, {difficulty_mapping[difficulty]}, {model_id}, Avg Rank: {cumulative_avg_rank.get(model_id, np.nan):.2f}, Avg Rank for run: {average_rank:.2f}, Ranks: {ranks_str}, {ranking_duration:.2f} s")
|
784 |
|
785 |
# Write iteration results to file (append mode) - write for each model right after ranking
|
786 |
iteration_results_file_opened.write(f"{iteration+1},{topic}, {difficulty_mapping[difficulty]},{question_avg_rank:.2f},{question_ranking_duration_total:.2f},{model_id},{cumulative_avg_rank.get(model_id, np.nan):.2f},{average_rank:.2f},{ranks_str},{ranking_duration:.2f}\n")
|
|
|
968 |
|
969 |
# Update progress to complete
|
970 |
progress_bar.progress(100)
|
971 |
+
st.subheader(f"Benchmark completed! {total_successful} successful iterations.")
|
972 |
|
973 |
# Display results
|
974 |
if total_successful > 0:
|