Spaces:
Sleeping
Sleeping
implemented histogram
Browse files- app.py +149 -10
- result.txt +1 -1
app.py
CHANGED
|
@@ -136,27 +136,47 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
|
|
| 136 |
# Helper function to evaluate task attempts
|
| 137 |
def evaluate_tasks(fields, tasks):
|
| 138 |
task_status = {}
|
|
|
|
|
|
|
|
|
|
| 139 |
for task in tasks:
|
|
|
|
| 140 |
relevant_attempts = [f for f in fields if task in f]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
if any("OK" in attempt for attempt in relevant_attempts):
|
| 142 |
task_status[task] = "Attempted (Successful)"
|
|
|
|
| 143 |
elif any("ERROR" in attempt for attempt in relevant_attempts):
|
| 144 |
task_status[task] = "Attempted (Error)"
|
| 145 |
elif any("JIT" in attempt for attempt in relevant_attempts):
|
| 146 |
task_status[task] = "Attempted (JIT)"
|
| 147 |
else:
|
| 148 |
task_status[task] = "Unattempted"
|
| 149 |
-
return task_status
|
| 150 |
|
| 151 |
# Evaluate tasks for each category
|
| 152 |
-
optional_task_1_status = evaluate_tasks(fields, optional_task_1_subtasks)
|
| 153 |
-
optional_task_2_status = evaluate_tasks(fields, optional_task_2_subtasks)
|
| 154 |
|
| 155 |
# Check if tasks have any successful attempt
|
| 156 |
opt1_done = any(status == "Attempted (Successful)" for status in optional_task_1_status.values())
|
| 157 |
opt2_done = any(status == "Attempted (Successful)" for status in optional_task_2_status.values())
|
| 158 |
|
| 159 |
-
return opt1_done, opt2_done
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
# Read data from test_info.txt
|
| 162 |
with open(test_info_location, "r") as file:
|
|
@@ -170,16 +190,40 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
|
|
| 170 |
1: {"ER": 0, "ME": 0, "both": 0,"none":0},
|
| 171 |
2: {"ER": 0, "ME": 0, "both": 0,"none":0}
|
| 172 |
}
|
| 173 |
-
|
| 174 |
# Analyze rows
|
|
|
|
|
|
|
|
|
|
| 175 |
for i, row in enumerate(data):
|
| 176 |
row = row.strip()
|
| 177 |
if not row:
|
| 178 |
continue
|
| 179 |
|
| 180 |
ideal_task = ideal_tasks[i] # Get the ideal task for the current row
|
| 181 |
-
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
if ideal_task == 0:
|
| 184 |
if opt1_done and not opt2_done:
|
| 185 |
task_counts[1]["ER"] += 1
|
|
@@ -198,6 +242,16 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
|
|
| 198 |
task_counts[2]["both"] += 1
|
| 199 |
else:
|
| 200 |
task_counts[2]["none"] +=1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
# Create a string output for results
|
| 203 |
# output_summary = "Task Analysis Summary:\n"
|
|
@@ -209,9 +263,90 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
|
|
| 209 |
# output_summary += f" Only OptionalTask_2 done: {counts['ME']}\n"
|
| 210 |
# output_summary += f" Both done: {counts['both']}\n"
|
| 211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
# colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
|
| 213 |
colors = ["#FF6F61", "#6B5B95", "#88B04B", "#F7CAC9"]
|
| 214 |
-
|
| 215 |
# Generate pie chart for Task 1
|
| 216 |
task1_labels = list(task_counts[1].keys())
|
| 217 |
task1_values = list(task_counts[1].values())
|
|
@@ -419,7 +554,7 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
|
|
| 419 |
ROC-AUC for problems of type ER: {opt_task1_roc_auc:.4f}
|
| 420 |
ROC-AUC for problems of type ME: {opt_task2_roc_auc:.4f}
|
| 421 |
"""
|
| 422 |
-
return text_output,fig,fig_task1,fig_task2
|
| 423 |
|
| 424 |
# List of models for the dropdown menu
|
| 425 |
|
|
@@ -818,13 +953,17 @@ with gr.Blocks(theme='gstaff/sketch', css=custom_css) as demo:
|
|
| 818 |
opt1_pie = gr.Plot(label="ER")
|
| 819 |
opt2_pie = gr.Plot(label="ME")
|
| 820 |
# output_summary = gr.Textbox(label="Summary")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 821 |
|
| 822 |
|
| 823 |
|
| 824 |
btn.click(
|
| 825 |
fn=process_file,
|
| 826 |
inputs=[model_dropdown,increment_slider],
|
| 827 |
-
outputs=[output_text,plot_output,opt1_pie,opt2_pie]
|
| 828 |
)
|
| 829 |
|
| 830 |
|
|
|
|
| 136 |
# Helper function to evaluate task attempts
|
| 137 |
def evaluate_tasks(fields, tasks):
|
| 138 |
task_status = {}
|
| 139 |
+
total_attempts = 0 # Counter for total number of attempts
|
| 140 |
+
attempted_tasks = 0 # Counter for tasks attempted at least once
|
| 141 |
+
successful_first_attempts = 0 # Counter for tasks successful on the first try
|
| 142 |
for task in tasks:
|
| 143 |
+
|
| 144 |
relevant_attempts = [f for f in fields if task in f]
|
| 145 |
+
attempt_count = len(relevant_attempts)
|
| 146 |
+
total_attempts += attempt_count # Add to the total attempts
|
| 147 |
+
|
| 148 |
+
if attempt_count > 0:
|
| 149 |
+
attempted_tasks += 1 # Increment attempted tasks count
|
| 150 |
+
|
| 151 |
+
# Check the first attempt
|
| 152 |
+
first_attempt = relevant_attempts[0]
|
| 153 |
+
if "OK" in first_attempt and "ERROR" not in first_attempt and "JIT" not in first_attempt:
|
| 154 |
+
successful_first_attempts += 1
|
| 155 |
+
|
| 156 |
+
|
| 157 |
if any("OK" in attempt for attempt in relevant_attempts):
|
| 158 |
task_status[task] = "Attempted (Successful)"
|
| 159 |
+
|
| 160 |
elif any("ERROR" in attempt for attempt in relevant_attempts):
|
| 161 |
task_status[task] = "Attempted (Error)"
|
| 162 |
elif any("JIT" in attempt for attempt in relevant_attempts):
|
| 163 |
task_status[task] = "Attempted (JIT)"
|
| 164 |
else:
|
| 165 |
task_status[task] = "Unattempted"
|
| 166 |
+
return task_status,attempted_tasks, total_attempts,successful_first_attempts
|
| 167 |
|
| 168 |
# Evaluate tasks for each category
|
| 169 |
+
optional_task_1_status, opt1_attempted, opt1_total_attempts, opt1_successful_first_attempts = evaluate_tasks(fields, optional_task_1_subtasks)
|
| 170 |
+
optional_task_2_status, opt2_attempted, opt2_total_attempts, opt2_successful_first_attempts = evaluate_tasks(fields, optional_task_2_subtasks)
|
| 171 |
|
| 172 |
# Check if tasks have any successful attempt
|
| 173 |
opt1_done = any(status == "Attempted (Successful)" for status in optional_task_1_status.values())
|
| 174 |
opt2_done = any(status == "Attempted (Successful)" for status in optional_task_2_status.values())
|
| 175 |
|
| 176 |
+
return (opt1_done, opt2_done,
|
| 177 |
+
opt1_attempted, opt2_attempted,
|
| 178 |
+
opt1_total_attempts, opt2_total_attempts,
|
| 179 |
+
opt1_successful_first_attempts, opt2_successful_first_attempts)
|
| 180 |
|
| 181 |
# Read data from test_info.txt
|
| 182 |
with open(test_info_location, "r") as file:
|
|
|
|
| 190 |
1: {"ER": 0, "ME": 0, "both": 0,"none":0},
|
| 191 |
2: {"ER": 0, "ME": 0, "both": 0,"none":0}
|
| 192 |
}
|
| 193 |
+
# To store task completion counts per row
|
| 194 |
# Analyze rows
|
| 195 |
+
row_summary = []
|
| 196 |
+
opt1_ratios = []
|
| 197 |
+
opt2_ratios = []
|
| 198 |
for i, row in enumerate(data):
|
| 199 |
row = row.strip()
|
| 200 |
if not row:
|
| 201 |
continue
|
| 202 |
|
| 203 |
ideal_task = ideal_tasks[i] # Get the ideal task for the current row
|
| 204 |
+
(
|
| 205 |
+
opt1_done, opt2_done,
|
| 206 |
+
opt1_attempted, opt2_attempted,
|
| 207 |
+
opt1_total_attempts, opt2_total_attempts,
|
| 208 |
+
opt1_successful_first_attempts, opt2_successful_first_attempts
|
| 209 |
+
) = analyze_row(row)
|
| 210 |
+
|
| 211 |
+
opt1_ratios.append(opt1_attempted / opt1_total_attempts if opt1_total_attempts > 0 else 0)
|
| 212 |
+
opt2_ratios.append(opt2_attempted / opt2_total_attempts if opt2_total_attempts > 0 else 0)
|
| 213 |
+
# create a summey for task:attempted, total attempts and succesful attempts for each row
|
| 214 |
+
# row_summary.append({
|
| 215 |
+
# "row_index": i + 1,
|
| 216 |
+
# "opt1": {
|
| 217 |
+
# "tasks_attempted": opt1_attempted,
|
| 218 |
+
# "total_attempts": opt1_total_attempts,
|
| 219 |
+
# "successful_attempts": opt1_successful_first_attempts,
|
| 220 |
+
# },
|
| 221 |
+
# "opt2": {
|
| 222 |
+
# "tasks_attempted": opt2_attempted,
|
| 223 |
+
# "total_attempts": opt2_total_attempts,
|
| 224 |
+
# "successful_attempts": opt2_successful_first_attempts,
|
| 225 |
+
# }
|
| 226 |
+
# })
|
| 227 |
if ideal_task == 0:
|
| 228 |
if opt1_done and not opt2_done:
|
| 229 |
task_counts[1]["ER"] += 1
|
|
|
|
| 242 |
task_counts[2]["both"] += 1
|
| 243 |
else:
|
| 244 |
task_counts[2]["none"] +=1
|
| 245 |
+
# Print a summary of task completions
|
| 246 |
+
# for summary in row_summary:
|
| 247 |
+
# print(f"\nRow {summary['row_index']}:")
|
| 248 |
+
# print(f" OptionalTask_1 - Tasks Attempted: {summary['opt1']['tasks_attempted']}, "
|
| 249 |
+
# f"Total Attempts: {summary['opt1']['total_attempts']}, "
|
| 250 |
+
# f"Successful Attempts: {summary['opt1']['successful_attempts']}")
|
| 251 |
+
# print(f" OptionalTask_2 - Tasks Attempted: {summary['opt2']['tasks_attempted']}, "
|
| 252 |
+
# f"Total Attempts: {summary['opt2']['total_attempts']}, "
|
| 253 |
+
# f"Successful Attempts: {summary['opt2']['successful_attempts']}")
|
| 254 |
+
|
| 255 |
|
| 256 |
# Create a string output for results
|
| 257 |
# output_summary = "Task Analysis Summary:\n"
|
|
|
|
| 263 |
# output_summary += f" Only OptionalTask_2 done: {counts['ME']}\n"
|
| 264 |
# output_summary += f" Both done: {counts['both']}\n"
|
| 265 |
|
| 266 |
+
|
| 267 |
+
# Create figure
|
| 268 |
+
fig_hist1 = go.Figure()
|
| 269 |
+
|
| 270 |
+
# Add histogram for OptionalTask_1 (ER)
|
| 271 |
+
fig_hist1.add_trace(go.Histogram(
|
| 272 |
+
x=opt1_ratios,
|
| 273 |
+
name="ER",
|
| 274 |
+
marker=dict(color='blue'),
|
| 275 |
+
opacity=1,
|
| 276 |
+
xbins=dict(
|
| 277 |
+
start=0.0,
|
| 278 |
+
end=1.0,
|
| 279 |
+
size=0.1 # Bin width set to 0.1 for 10 bins
|
| 280 |
+
)
|
| 281 |
+
))
|
| 282 |
+
fig_hist2=go.Figure()
|
| 283 |
+
# Add histogram for OptionalTask_2 (ME)
|
| 284 |
+
fig_hist2.add_trace(go.Histogram(
|
| 285 |
+
x=opt2_ratios,
|
| 286 |
+
name="ME",
|
| 287 |
+
marker=dict(color='red'),
|
| 288 |
+
opacity=1,
|
| 289 |
+
xbins=dict(
|
| 290 |
+
start=0.0,
|
| 291 |
+
end=1.0,
|
| 292 |
+
size=0.1 # Bin width set to 0.1 for 10 bins
|
| 293 |
+
)
|
| 294 |
+
))
|
| 295 |
+
|
| 296 |
+
# Update layout
|
| 297 |
+
fig_hist1.update_layout(
|
| 298 |
+
title="ER: Histogram of Task Success Ratios Across Dataset",
|
| 299 |
+
title_x=0.5,
|
| 300 |
+
xaxis=dict(
|
| 301 |
+
title="Success Ratio (Tasks Attempted / Total Attempts)",
|
| 302 |
+
tickmode="array",
|
| 303 |
+
tickvals=np.linspace(0, 1, 11), # 10 evenly spaced ticks
|
| 304 |
+
),
|
| 305 |
+
yaxis=dict(
|
| 306 |
+
title="Number of Instances"
|
| 307 |
+
),
|
| 308 |
+
font=dict(
|
| 309 |
+
family="sans-serif",
|
| 310 |
+
size=12,
|
| 311 |
+
color="black"
|
| 312 |
+
),
|
| 313 |
+
legend=dict(
|
| 314 |
+
font=dict(
|
| 315 |
+
family="sans-serif",
|
| 316 |
+
size=12,
|
| 317 |
+
color="black"
|
| 318 |
+
)
|
| 319 |
+
),
|
| 320 |
+
barmode='overlay' # Overlapping bars
|
| 321 |
+
)
|
| 322 |
+
fig_hist2.update_layout(
|
| 323 |
+
title="ME: Histogram of Task Success Ratios Across Dataset",
|
| 324 |
+
title_x=0.5,
|
| 325 |
+
xaxis=dict(
|
| 326 |
+
title="Success Ratio (Tasks Attempted / Total Attempts)",
|
| 327 |
+
tickmode="array",
|
| 328 |
+
tickvals=np.linspace(0, 1, 11), # 10 evenly spaced ticks
|
| 329 |
+
),
|
| 330 |
+
yaxis=dict(
|
| 331 |
+
title="Number of Instances"
|
| 332 |
+
),
|
| 333 |
+
font=dict(
|
| 334 |
+
family="sans-serif",
|
| 335 |
+
size=12,
|
| 336 |
+
color="black"
|
| 337 |
+
),
|
| 338 |
+
legend=dict(
|
| 339 |
+
font=dict(
|
| 340 |
+
family="sans-serif",
|
| 341 |
+
size=12,
|
| 342 |
+
color="black"
|
| 343 |
+
)
|
| 344 |
+
),
|
| 345 |
+
barmode='overlay' # Overlapping bars
|
| 346 |
+
)
|
| 347 |
# colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
|
| 348 |
colors = ["#FF6F61", "#6B5B95", "#88B04B", "#F7CAC9"]
|
| 349 |
+
# print(opt1_ratios,opt2_ratios)
|
| 350 |
# Generate pie chart for Task 1
|
| 351 |
task1_labels = list(task_counts[1].keys())
|
| 352 |
task1_values = list(task_counts[1].values())
|
|
|
|
| 554 |
ROC-AUC for problems of type ER: {opt_task1_roc_auc:.4f}
|
| 555 |
ROC-AUC for problems of type ME: {opt_task2_roc_auc:.4f}
|
| 556 |
"""
|
| 557 |
+
return text_output,fig,fig_task1,fig_task2,fig_hist1,fig_hist2
|
| 558 |
|
| 559 |
# List of models for the dropdown menu
|
| 560 |
|
|
|
|
| 953 |
opt1_pie = gr.Plot(label="ER")
|
| 954 |
opt2_pie = gr.Plot(label="ME")
|
| 955 |
# output_summary = gr.Textbox(label="Summary")
|
| 956 |
+
with gr.Row():
|
| 957 |
+
histo1 = gr.Plot(label="Hist")
|
| 958 |
+
histo2 = gr.Plot(label="Hist")
|
| 959 |
+
|
| 960 |
|
| 961 |
|
| 962 |
|
| 963 |
btn.click(
|
| 964 |
fn=process_file,
|
| 965 |
inputs=[model_dropdown,increment_slider],
|
| 966 |
+
outputs=[output_text,plot_output,opt1_pie,opt2_pie,histo1,histo2]
|
| 967 |
)
|
| 968 |
|
| 969 |
|
result.txt
CHANGED
|
@@ -3,5 +3,5 @@ total_acc: 69.00702106318957
|
|
| 3 |
precisions: 0.7236623191454734
|
| 4 |
recalls: 0.6900702106318957
|
| 5 |
f1_scores: 0.6802420656474512
|
| 6 |
-
time_taken_from_start:
|
| 7 |
auc_score: 0.7457100293916334
|
|
|
|
| 3 |
precisions: 0.7236623191454734
|
| 4 |
recalls: 0.6900702106318957
|
| 5 |
f1_scores: 0.6802420656474512
|
| 6 |
+
time_taken_from_start: 23.562122583389282
|
| 7 |
auc_score: 0.7457100293916334
|