Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -12,13 +12,17 @@ import sys
|
|
12 |
import io
|
13 |
from contextlib import redirect_stdout, redirect_stderr
|
14 |
|
15 |
-
# Initialize session state variables
|
16 |
if 'main_output' not in st.session_state:
|
17 |
-
st.session_state
|
18 |
if 'debug_output' not in st.session_state:
|
19 |
-
st.session_state
|
20 |
if 'progress' not in st.session_state:
|
21 |
-
st.session_state
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# FILES
|
24 |
iteration_output_file = "llm_benchmark_iteration_results.csv" # File to store iteration results, defined as global
|
@@ -55,21 +59,15 @@ def custom_print(*args, **kwargs):
|
|
55 |
output = ' '.join(map(str, args))
|
56 |
|
57 |
# Add to main output list
|
58 |
-
st.session_state
|
59 |
|
60 |
# Also print to standard output for console logging
|
61 |
print(*args, **kwargs)
|
62 |
-
|
63 |
-
# Force an immediate update of the UI (when used inside a function)
|
64 |
-
st.session_state.update_counter = st.session_state.get('update_counter', 0) + 1
|
65 |
|
66 |
# Custom function to capture warnings and errors
|
67 |
def log_debug(message):
|
68 |
-
st.session_state
|
69 |
print(f"DEBUG: {message}", file=sys.stderr)
|
70 |
-
|
71 |
-
# Force an immediate update of the UI
|
72 |
-
st.session_state.update_counter = st.session_state.get('update_counter', 0) + 1
|
73 |
|
74 |
def retry_api_request(max_retries=3, wait_time=10):
|
75 |
"""Decorator for retrying API requests with rate limit handling."""
|
@@ -511,6 +509,8 @@ def get_answer_from_model(model_id, question, consecutive_failures, failure_thre
|
|
511 |
|
512 |
# --- Core Logic ---
|
513 |
def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
|
|
|
|
|
514 |
results = {
|
515 |
"model_name": [],
|
516 |
"topic": [],
|
@@ -561,7 +561,7 @@ def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
|
|
561 |
|
562 |
for iteration in range(t): # Added iteration counter
|
563 |
# Update progress in the Streamlit app
|
564 |
-
st.session_state
|
565 |
|
566 |
if len(active_models) < 2:
|
567 |
custom_print("Fewer than 2 active models remaining. Exiting benchmark.")
|
@@ -647,8 +647,11 @@ def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
|
|
647 |
question_ranking_futures.append(future)
|
648 |
|
649 |
for future in concurrent.futures.as_completed(question_ranking_futures): # Collect ranks as they become available
|
650 |
-
|
651 |
-
|
|
|
|
|
|
|
652 |
|
653 |
question_ranking_end_time = time.time()
|
654 |
question_ranking_duration_total = question_ranking_end_time - question_ranking_start_time
|
@@ -674,11 +677,11 @@ def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
|
|
674 |
|
675 |
if question_avg_rank >= question_treshold and all(rank > reject_rank for rank in valid_question_ranks_values): # Question acceptance criteria
|
676 |
question_accepted = True
|
677 |
-
custom_print(f"Question accepted. Avg Question Rank: {question_avg_rank:.2f}, Min Rank: {min_question_rank}, Ranks: {[question_ranks
|
678 |
s_t += 1
|
679 |
else:
|
680 |
question_accepted = False
|
681 |
-
custom_print(f"Question rejected. Avg Question Rank: {question_avg_rank:.2f}, Min Rank: {min_question_rank}, Ranks: {[question_ranks
|
682 |
|
683 |
if not question_accepted:
|
684 |
custom_print("Generated question was not accepted. Regenerating question.")
|
@@ -709,35 +712,43 @@ def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
|
|
709 |
token,
|
710 |
timeout=60
|
711 |
)
|
712 |
-
answer_futures.append(future)
|
713 |
-
except
|
714 |
-
log_debug(f"
|
715 |
-
answer = "
|
716 |
-
duration =
|
717 |
-
answers[model_id] = answer
|
718 |
-
answer_durations[model_id] = duration
|
719 |
-
|
720 |
-
for future in
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
|
|
|
|
|
|
|
|
725 |
|
726 |
# --- Ranking Process ---
|
727 |
|
728 |
# Prepare to write to file (open in append mode outside the model loop but inside iteration loop)
|
729 |
-
|
730 |
-
|
731 |
-
|
|
|
|
|
|
|
|
|
732 |
|
733 |
|
734 |
for model_id in active_models:
|
735 |
-
|
736 |
-
if not answer: # Add guard clause
|
737 |
log_debug(f"No answer found for model {model_id}. Skipping ranking.")
|
738 |
continue
|
739 |
|
740 |
-
|
|
|
741 |
consecutive_failures[model_id] += 1
|
742 |
if consecutive_failures[model_id] >= failure_threshold:
|
743 |
custom_print(f"Model {model_id} is consistently failing to answer. Removing from active models.")
|
@@ -759,24 +770,30 @@ def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
|
|
759 |
for ranking_model_id in active_models:
|
760 |
# --- Filter for ranking roles ("rank" or "both") ---
|
761 |
if model_config[ranking_model_id].get("role", "both") in ["rank", "both"]:
|
762 |
-
|
763 |
-
|
764 |
-
|
765 |
-
|
766 |
-
|
767 |
-
|
768 |
-
|
769 |
-
|
770 |
-
|
771 |
-
|
772 |
-
|
773 |
-
|
774 |
-
|
775 |
-
|
|
|
|
|
|
|
776 |
|
777 |
for future in concurrent.futures.as_completed(ranking_futures): # Collect ranks as they become available
|
778 |
-
|
779 |
-
|
|
|
|
|
|
|
780 |
|
781 |
ranking_end_time = time.time() # Record end time of ranking
|
782 |
ranking_duration = ranking_end_time - ranking_start_time # Calculate duration
|
@@ -807,20 +824,29 @@ def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
|
|
807 |
results["answer"].append(answer)
|
808 |
results["answer_generation_duration"].append(answer_durations.get(model_id, 0))
|
809 |
results["average_rank"].append(average_rank)
|
810 |
-
results["ranks"].append([ranks
|
811 |
results["question_rank_average"].append(question_avg_rank) # Store question rank average
|
812 |
-
results["question_ranks"].append([question_ranks
|
813 |
results["question_rank_duration"].append(question_ranking_duration_total) # Store question ranking duration
|
814 |
|
815 |
-
|
816 |
-
|
|
|
|
|
|
|
|
|
|
|
817 |
|
818 |
# --- Print and store iteration results IMMEDIATELY after ranking for this model ---
|
819 |
-
ranks_str = "[" + ", ".join(map(str, [ranks
|
820 |
-
custom_print(f"{topic}, {difficulty_mapping[difficulty]}, {model_id}, {cumulative_avg_rank
|
821 |
|
822 |
# Write iteration results to file (append mode) - write for each model right after ranking
|
823 |
-
iteration_results_file_opened
|
|
|
|
|
|
|
|
|
824 |
|
825 |
# Update model weights based on cumulative average ranks, handling NaNs
|
826 |
temp_weights = {}
|
@@ -842,9 +868,11 @@ def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
|
|
842 |
for m_id in active_models:
|
843 |
model_weights[m_id] = 1.0 / len(active_models)
|
844 |
|
845 |
-
iteration_results_file_opened
|
|
|
846 |
|
847 |
custom_print(f"Unresponsive models during this run: {unresponsive_models}")
|
|
|
848 |
return results, cumulative_avg_rank, s_t
|
849 |
|
850 |
def check_model_availability(models, token):
|
@@ -970,14 +998,14 @@ with tab1:
|
|
970 |
st.error("You need at least 2 available models to run the benchmark")
|
971 |
|
972 |
# Progress bar
|
973 |
-
progress_bar = st.progress(st.session_state
|
974 |
|
975 |
# Start benchmark button
|
976 |
if st.sidebar.button("Start Benchmark"):
|
977 |
# Clear previous outputs
|
978 |
-
st.session_state
|
979 |
-
st.session_state
|
980 |
-
st.session_state
|
981 |
|
982 |
if not hf_token:
|
983 |
st.error("Please enter your Hugging Face API token")
|
@@ -989,10 +1017,6 @@ with tab1:
|
|
989 |
# Setup to capture results for display
|
990 |
results_container = st.container()
|
991 |
|
992 |
-
# Create a global variable to store intermediate results
|
993 |
-
if 'results_df' not in st.session_state:
|
994 |
-
st.session_state.results_df = pd.DataFrame()
|
995 |
-
|
996 |
# Run the benchmark
|
997 |
try:
|
998 |
# Run benchmark and get results
|
@@ -1003,19 +1027,19 @@ with tab1:
|
|
1003 |
)
|
1004 |
|
1005 |
# Update progress to complete
|
1006 |
-
st.session_state
|
1007 |
progress_bar.progress(1.0)
|
1008 |
|
1009 |
# Display results
|
1010 |
if total_successful > 0:
|
1011 |
results_df = pd.DataFrame(results)
|
1012 |
-
st.session_state
|
1013 |
|
1014 |
# Show model rankings
|
1015 |
st.subheader("Model Rankings")
|
1016 |
ranking_df = pd.DataFrame({
|
1017 |
"Model": list(cumulative_avg_rank.keys()),
|
1018 |
-
"Average Rank": [round(r, 2) for r in cumulative_avg_rank.values()]
|
1019 |
})
|
1020 |
ranking_df = ranking_df.sort_values("Average Rank", ascending=False)
|
1021 |
st.dataframe(ranking_df)
|
@@ -1039,35 +1063,42 @@ with tab1:
|
|
1039 |
st.exception(e)
|
1040 |
|
1041 |
# Show previous results if available
|
1042 |
-
elif 'results_df' in st.session_state and not st.session_state
|
1043 |
st.subheader("Previous Results")
|
1044 |
-
st.dataframe(st.session_state
|
1045 |
|
1046 |
with tab2:
|
1047 |
# Display main output log
|
1048 |
st.subheader("Execution Log")
|
1049 |
|
1050 |
# Display logs
|
1051 |
-
|
1052 |
-
|
|
|
|
|
|
|
1053 |
|
1054 |
# Add a refresh button for the log
|
1055 |
if st.button("Refresh Progress Log"):
|
1056 |
-
|
1057 |
|
1058 |
with tab3:
|
1059 |
# Display debug output
|
1060 |
st.subheader("Debug Log")
|
1061 |
|
1062 |
# Display debug logs
|
1063 |
-
|
1064 |
-
|
|
|
|
|
|
|
1065 |
|
1066 |
# Add a refresh button for the debug log
|
1067 |
if st.button("Refresh Debug Log"):
|
1068 |
-
|
1069 |
-
|
1070 |
-
# Auto-
|
1071 |
-
if st.session_state.get('
|
1072 |
-
|
1073 |
-
|
|
|
|
12 |
import io
|
13 |
from contextlib import redirect_stdout, redirect_stderr
|
14 |
|
15 |
+
# Initialize session state variables properly at the very beginning
|
16 |
if 'main_output' not in st.session_state:
|
17 |
+
st.session_state['main_output'] = []
|
18 |
if 'debug_output' not in st.session_state:
|
19 |
+
st.session_state['debug_output'] = []
|
20 |
if 'progress' not in st.session_state:
|
21 |
+
st.session_state['progress'] = 0
|
22 |
+
if 'results_df' not in st.session_state:
|
23 |
+
st.session_state['results_df'] = pd.DataFrame()
|
24 |
+
if 'is_running' not in st.session_state:
|
25 |
+
st.session_state['is_running'] = False
|
26 |
|
27 |
# FILES
|
28 |
iteration_output_file = "llm_benchmark_iteration_results.csv" # File to store iteration results, defined as global
|
|
|
59 |
output = ' '.join(map(str, args))
|
60 |
|
61 |
# Add to main output list
|
62 |
+
st.session_state['main_output'].append(output)
|
63 |
|
64 |
# Also print to standard output for console logging
|
65 |
print(*args, **kwargs)
|
|
|
|
|
|
|
66 |
|
67 |
# Custom function to capture warnings and errors
|
68 |
def log_debug(message):
|
69 |
+
st.session_state['debug_output'].append(message)
|
70 |
print(f"DEBUG: {message}", file=sys.stderr)
|
|
|
|
|
|
|
71 |
|
72 |
def retry_api_request(max_retries=3, wait_time=10):
|
73 |
"""Decorator for retrying API requests with rate limit handling."""
|
|
|
509 |
|
510 |
# --- Core Logic ---
|
511 |
def run_benchmark(hf_models, topics, difficulties, t, model_config, token=None):
|
512 |
+
st.session_state['is_running'] = True
|
513 |
+
|
514 |
results = {
|
515 |
"model_name": [],
|
516 |
"topic": [],
|
|
|
561 |
|
562 |
for iteration in range(t): # Added iteration counter
|
563 |
# Update progress in the Streamlit app
|
564 |
+
st.session_state['progress'] = (iteration + 1) / t
|
565 |
|
566 |
if len(active_models) < 2:
|
567 |
custom_print("Fewer than 2 active models remaining. Exiting benchmark.")
|
|
|
647 |
question_ranking_futures.append(future)
|
648 |
|
649 |
for future in concurrent.futures.as_completed(question_ranking_futures): # Collect ranks as they become available
|
650 |
+
try:
|
651 |
+
ranking_model_id, rank = future.result() # Get model_id and rank
|
652 |
+
question_ranks[ranking_model_id] = rank # Store rank with model_id as key
|
653 |
+
except Exception as e:
|
654 |
+
log_debug(f"Error getting question rank result: {e}")
|
655 |
|
656 |
question_ranking_end_time = time.time()
|
657 |
question_ranking_duration_total = question_ranking_end_time - question_ranking_start_time
|
|
|
677 |
|
678 |
if question_avg_rank >= question_treshold and all(rank > reject_rank for rank in valid_question_ranks_values): # Question acceptance criteria
|
679 |
question_accepted = True
|
680 |
+
custom_print(f"Question accepted. Avg Question Rank: {question_avg_rank:.2f}, Min Rank: {min_question_rank}, Ranks: {[question_ranks.get(m, None) for m in active_models if m in question_ranks]}")
|
681 |
s_t += 1
|
682 |
else:
|
683 |
question_accepted = False
|
684 |
+
custom_print(f"Question rejected. Avg Question Rank: {question_avg_rank:.2f}, Min Rank: {min_question_rank}, Ranks: {[question_ranks.get(m, None) for m in active_models if m in question_ranks]}")
|
685 |
|
686 |
if not question_accepted:
|
687 |
custom_print("Generated question was not accepted. Regenerating question.")
|
|
|
712 |
token,
|
713 |
timeout=60
|
714 |
)
|
715 |
+
answer_futures.append((model_id, future))
|
716 |
+
except Exception as e:
|
717 |
+
log_debug(f"Error submitting answer task for {model_id}: {e}")
|
718 |
+
answer = "Error answering - Task submission failed"
|
719 |
+
duration = 0
|
720 |
+
answers[model_id] = answer
|
721 |
+
answer_durations[model_id] = duration
|
722 |
+
|
723 |
+
for model_id, future in answer_futures:
|
724 |
+
try:
|
725 |
+
answer, duration = future.result() # Get both answer and duration
|
726 |
+
answers[model_id] = answer
|
727 |
+
answer_durations[model_id] = duration
|
728 |
+
except Exception as e:
|
729 |
+
log_debug(f"Error getting answer from {model_id}: {e}")
|
730 |
+
answers[model_id] = "Error answering - Future result failed"
|
731 |
+
answer_durations[model_id] = 0
|
732 |
|
733 |
# --- Ranking Process ---
|
734 |
|
735 |
# Prepare to write to file (open in append mode outside the model loop but inside iteration loop)
|
736 |
+
try:
|
737 |
+
iteration_results_file_opened = open(iteration_output_file, 'a')
|
738 |
+
if iteration == 0: # Write header only for the first iteration
|
739 |
+
iteration_results_file_opened.write("Iteration, Topic, Difficulty, Question Rank, QR Duration, Model,Cumulative Avg Rank,Iteration Avg Rank,Ranks,Ranking Duration (sec)\n") # Added Ranking Duration to header
|
740 |
+
except Exception as e:
|
741 |
+
log_debug(f"Error opening results file: {e}")
|
742 |
+
iteration_results_file_opened = None
|
743 |
|
744 |
|
745 |
for model_id in active_models:
|
746 |
+
if model_id not in answers:
|
|
|
747 |
log_debug(f"No answer found for model {model_id}. Skipping ranking.")
|
748 |
continue
|
749 |
|
750 |
+
answer = answers[model_id]
|
751 |
+
if answer == "Error answering" or answer.startswith("Error answering -"): # Handle answer generation errors
|
752 |
consecutive_failures[model_id] += 1
|
753 |
if consecutive_failures[model_id] >= failure_threshold:
|
754 |
custom_print(f"Model {model_id} is consistently failing to answer. Removing from active models.")
|
|
|
770 |
for ranking_model_id in active_models:
|
771 |
# --- Filter for ranking roles ("rank" or "both") ---
|
772 |
if model_config[ranking_model_id].get("role", "both") in ["rank", "both"]:
|
773 |
+
try:
|
774 |
+
future = executor.submit(
|
775 |
+
get_rank_from_model,
|
776 |
+
ranking_model_id,
|
777 |
+
question,
|
778 |
+
answer,
|
779 |
+
consecutive_failures,
|
780 |
+
failure_threshold,
|
781 |
+
unresponsive_models,
|
782 |
+
model_config,
|
783 |
+
topic,
|
784 |
+
token,
|
785 |
+
timeout=60
|
786 |
+
)
|
787 |
+
ranking_futures.append(future)
|
788 |
+
except Exception as e:
|
789 |
+
log_debug(f"Error submitting ranking task for {ranking_model_id}: {e}")
|
790 |
|
791 |
for future in concurrent.futures.as_completed(ranking_futures): # Collect ranks as they become available
|
792 |
+
try:
|
793 |
+
ranking_model_id, rank = future.result() # Get model_id and rank
|
794 |
+
ranks[ranking_model_id] = rank # Store rank with model_id as key
|
795 |
+
except Exception as e:
|
796 |
+
log_debug(f"Error getting rank result: {e}")
|
797 |
|
798 |
ranking_end_time = time.time() # Record end time of ranking
|
799 |
ranking_duration = ranking_end_time - ranking_start_time # Calculate duration
|
|
|
824 |
results["answer"].append(answer)
|
825 |
results["answer_generation_duration"].append(answer_durations.get(model_id, 0))
|
826 |
results["average_rank"].append(average_rank)
|
827 |
+
results["ranks"].append([ranks.get(m, None) for m in active_models if m in ranks]) # Store raw ranks including Nones, ensure order
|
828 |
results["question_rank_average"].append(question_avg_rank) # Store question rank average
|
829 |
+
results["question_ranks"].append([question_ranks.get(m, None) for m in active_models if m in question_ranks]) # Store question ranks
|
830 |
results["question_rank_duration"].append(question_ranking_duration_total) # Store question ranking duration
|
831 |
|
832 |
+
if model_id in cumulative_model_ranks:
|
833 |
+
cumulative_model_ranks[model_id].append(average_rank) # Append current iteration's average rank
|
834 |
+
|
835 |
+
if model_id in cumulative_model_ranks and cumulative_model_ranks[model_id]:
|
836 |
+
cumulative_avg_rank[model_id] = np.nanmean([r for r in cumulative_model_ranks[model_id] if not np.isnan(r)])
|
837 |
+
else:
|
838 |
+
cumulative_avg_rank[model_id] = np.nan
|
839 |
|
840 |
# --- Print and store iteration results IMMEDIATELY after ranking for this model ---
|
841 |
+
ranks_str = "[" + ", ".join(map(str, [ranks.get(m, None) for m in active_models if m in ranks])) + "]" if ranks else "[]" # Format ranks for CSV, ensure order
|
842 |
+
custom_print(f"{topic}, {difficulty_mapping[difficulty]}, {model_id}, {cumulative_avg_rank.get(model_id, np.nan):.2f}, {average_rank:.5f}, {ranks_str}, {ranking_duration:.2f} sec")
|
843 |
|
844 |
# Write iteration results to file (append mode) - write for each model right after ranking
|
845 |
+
if iteration_results_file_opened:
|
846 |
+
try:
|
847 |
+
iteration_results_file_opened.write(f"{iteration+1},{topic}, {difficulty_mapping[difficulty]},{question_avg_rank:.2f},{question_ranking_duration_total:.2f},{model_id},{cumulative_avg_rank.get(model_id, np.nan):.2f},{average_rank:.2f},{ranks_str},{ranking_duration:.2f}\n")
|
848 |
+
except Exception as e:
|
849 |
+
log_debug(f"Error writing to results file: {e}")
|
850 |
|
851 |
# Update model weights based on cumulative average ranks, handling NaNs
|
852 |
temp_weights = {}
|
|
|
868 |
for m_id in active_models:
|
869 |
model_weights[m_id] = 1.0 / len(active_models)
|
870 |
|
871 |
+
if iteration_results_file_opened:
|
872 |
+
iteration_results_file_opened.close()
|
873 |
|
874 |
custom_print(f"Unresponsive models during this run: {unresponsive_models}")
|
875 |
+
st.session_state['is_running'] = False
|
876 |
return results, cumulative_avg_rank, s_t
|
877 |
|
878 |
def check_model_availability(models, token):
|
|
|
998 |
st.error("You need at least 2 available models to run the benchmark")
|
999 |
|
1000 |
# Progress bar
|
1001 |
+
progress_bar = st.progress(st.session_state['progress'])
|
1002 |
|
1003 |
# Start benchmark button
|
1004 |
if st.sidebar.button("Start Benchmark"):
|
1005 |
# Clear previous outputs
|
1006 |
+
st.session_state['main_output'] = []
|
1007 |
+
st.session_state['debug_output'] = []
|
1008 |
+
st.session_state['progress'] = 0
|
1009 |
|
1010 |
if not hf_token:
|
1011 |
st.error("Please enter your Hugging Face API token")
|
|
|
1017 |
# Setup to capture results for display
|
1018 |
results_container = st.container()
|
1019 |
|
|
|
|
|
|
|
|
|
1020 |
# Run the benchmark
|
1021 |
try:
|
1022 |
# Run benchmark and get results
|
|
|
1027 |
)
|
1028 |
|
1029 |
# Update progress to complete
|
1030 |
+
st.session_state['progress'] = 1.0
|
1031 |
progress_bar.progress(1.0)
|
1032 |
|
1033 |
# Display results
|
1034 |
if total_successful > 0:
|
1035 |
results_df = pd.DataFrame(results)
|
1036 |
+
st.session_state['results_df'] = results_df
|
1037 |
|
1038 |
# Show model rankings
|
1039 |
st.subheader("Model Rankings")
|
1040 |
ranking_df = pd.DataFrame({
|
1041 |
"Model": list(cumulative_avg_rank.keys()),
|
1042 |
+
"Average Rank": [round(r, 2) if not np.isnan(r) else 'N/A' for r in cumulative_avg_rank.values()]
|
1043 |
})
|
1044 |
ranking_df = ranking_df.sort_values("Average Rank", ascending=False)
|
1045 |
st.dataframe(ranking_df)
|
|
|
1063 |
st.exception(e)
|
1064 |
|
1065 |
# Show previous results if available
|
1066 |
+
elif 'results_df' in st.session_state and not st.session_state['results_df'].empty:
|
1067 |
st.subheader("Previous Results")
|
1068 |
+
st.dataframe(st.session_state['results_df'])
|
1069 |
|
1070 |
with tab2:
|
1071 |
# Display main output log
|
1072 |
st.subheader("Execution Log")
|
1073 |
|
1074 |
# Display logs
|
1075 |
+
if 'main_output' in st.session_state:
|
1076 |
+
log_text = "\n".join(st.session_state['main_output'])
|
1077 |
+
st.text_area("Progress Log", log_text, height=400)
|
1078 |
+
else:
|
1079 |
+
st.text_area("Progress Log", "No progress logs yet.", height=400)
|
1080 |
|
1081 |
# Add a refresh button for the log
|
1082 |
if st.button("Refresh Progress Log"):
|
1083 |
+
st.experimental_rerun()
|
1084 |
|
1085 |
with tab3:
|
1086 |
# Display debug output
|
1087 |
st.subheader("Debug Log")
|
1088 |
|
1089 |
# Display debug logs
|
1090 |
+
if 'debug_output' in st.session_state:
|
1091 |
+
debug_text = "\n".join(st.session_state['debug_output'])
|
1092 |
+
st.text_area("Debug Information", debug_text, height=400)
|
1093 |
+
else:
|
1094 |
+
st.text_area("Debug Information", "No debug logs yet.", height=400)
|
1095 |
|
1096 |
# Add a refresh button for the debug log
|
1097 |
if st.button("Refresh Debug Log"):
|
1098 |
+
st.experimental_rerun()
|
1099 |
+
|
1100 |
+
# Auto-update while benchmark is running
|
1101 |
+
if st.session_state.get('is_running', False):
|
1102 |
+
st.empty()
|
1103 |
+
time.sleep(5) # Update every 5 seconds while running
|
1104 |
+
st.rerun()
|