Spaces:
Runtime error
Runtime error
add debug info
Browse files- backend-cli.py +1 -0
- src/display/utils.py +4 -2
- src/leaderboard/read_evals.py +3 -0
- src/utils.py +3 -3
backend-cli.py
CHANGED
|
@@ -172,6 +172,7 @@ def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[in
|
|
| 172 |
results['results'][task_name][f"{key},none"] = value
|
| 173 |
|
| 174 |
results['results'][task_name]['batch_size,none'] = batch_size
|
|
|
|
| 175 |
print(f"gpu_stats_list: {gpu_stats_list}")
|
| 176 |
print("GPU Usage:", gpu_info)
|
| 177 |
|
|
|
|
| 172 |
results['results'][task_name][f"{key},none"] = value
|
| 173 |
|
| 174 |
results['results'][task_name]['batch_size,none'] = batch_size
|
| 175 |
+
results['results'][task_name]['precision,none'] = eval_request.precision
|
| 176 |
print(f"gpu_stats_list: {gpu_stats_list}")
|
| 177 |
print("GPU Usage:", gpu_info)
|
| 178 |
|
src/display/utils.py
CHANGED
|
@@ -15,11 +15,11 @@ MULTIPLE_CHOICEs = ["mmlu"]
|
|
| 15 |
|
| 16 |
GPU_TEMP = 'Temp(C)'
|
| 17 |
GPU_Power = 'Power(W)'
|
| 18 |
-
GPU_Mem = 'Mem(
|
| 19 |
GPU_Name = "GPU"
|
| 20 |
GPU_Util = 'Util(%)'
|
| 21 |
BATCH_SIZE = 'bs'
|
| 22 |
-
|
| 23 |
system_metrics_to_name_map = {
|
| 24 |
"end_to_end_time": f"{E2Es}",
|
| 25 |
"prefilling_time": f"{PREs}",
|
|
@@ -32,6 +32,7 @@ gpu_metrics_to_name_map = {
|
|
| 32 |
GPU_Power: GPU_Power,
|
| 33 |
GPU_Mem: GPU_Mem,
|
| 34 |
"batch_size": BATCH_SIZE,
|
|
|
|
| 35 |
GPU_Name: GPU_Name,
|
| 36 |
}
|
| 37 |
|
|
@@ -105,6 +106,7 @@ for task in Tasks:
|
|
| 105 |
# System performance metrics
|
| 106 |
auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True)])
|
| 107 |
auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True)])
|
|
|
|
| 108 |
auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True)])
|
| 109 |
auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True)])
|
| 110 |
auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True)])
|
|
|
|
| 15 |
|
| 16 |
GPU_TEMP = 'Temp(C)'
|
| 17 |
GPU_Power = 'Power(W)'
|
| 18 |
+
GPU_Mem = 'Mem(G)'
|
| 19 |
GPU_Name = "GPU"
|
| 20 |
GPU_Util = 'Util(%)'
|
| 21 |
BATCH_SIZE = 'bs'
|
| 22 |
+
PRECISION = "Precision"
|
| 23 |
system_metrics_to_name_map = {
|
| 24 |
"end_to_end_time": f"{E2Es}",
|
| 25 |
"prefilling_time": f"{PREs}",
|
|
|
|
| 32 |
GPU_Power: GPU_Power,
|
| 33 |
GPU_Mem: GPU_Mem,
|
| 34 |
"batch_size": BATCH_SIZE,
|
| 35 |
+
"precision": PRECISION,
|
| 36 |
GPU_Name: GPU_Name,
|
| 37 |
}
|
| 38 |
|
|
|
|
| 106 |
# System performance metrics
|
| 107 |
auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True)])
|
| 108 |
auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True)])
|
| 109 |
+
# auto_eval_column_dict.append([f"{task.name}_precision", ColumnContent, ColumnContent(f"{task.value.col_name} {PRECISION}", "str", True)])
|
| 110 |
auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True)])
|
| 111 |
auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True)])
|
| 112 |
auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True)])
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -106,6 +106,9 @@ class EvalResult:
|
|
| 106 |
if "GPU" in metric:
|
| 107 |
results[benchmark][metric] = value
|
| 108 |
continue
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
if "rouge" in metric and "truthful" not in benchmark:
|
| 111 |
multiplier = 1.0
|
|
|
|
| 106 |
if "GPU" in metric:
|
| 107 |
results[benchmark][metric] = value
|
| 108 |
continue
|
| 109 |
+
if "precision" in metric:
|
| 110 |
+
results[benchmark][metric] = value
|
| 111 |
+
continue
|
| 112 |
|
| 113 |
if "rouge" in metric and "truthful" not in benchmark:
|
| 114 |
multiplier = 1.0
|
src/utils.py
CHANGED
|
@@ -74,13 +74,13 @@ def parse_nvidia_smi():
|
|
| 74 |
gpu_info.update({
|
| 75 |
GPU_TEMP: temp,
|
| 76 |
GPU_Power: power_usage,
|
| 77 |
-
GPU_Mem: mem_usage,
|
| 78 |
GPU_Util: gpu_util
|
| 79 |
})
|
| 80 |
|
| 81 |
if len(gpu_info) >= 4:
|
| 82 |
gpu_stats.append(gpu_info)
|
| 83 |
-
print(f"
|
| 84 |
gpu_name = f"{len(gpu_stats)}x{gpu_name}"
|
| 85 |
gpu_stats_total = {
|
| 86 |
GPU_TEMP: 0,
|
|
@@ -94,7 +94,7 @@ def parse_nvidia_smi():
|
|
| 94 |
gpu_stats_total[GPU_Power] += gpu_stat[GPU_Power]
|
| 95 |
gpu_stats_total[GPU_Mem] += gpu_stat[GPU_Mem]
|
| 96 |
gpu_stats_total[GPU_Util] += gpu_stat[GPU_Util]
|
| 97 |
-
|
| 98 |
gpu_stats_total[GPU_TEMP] /= len(gpu_stats)
|
| 99 |
gpu_stats_total[GPU_Power] /= len(gpu_stats)
|
| 100 |
gpu_stats_total[GPU_Util] /= len(gpu_stats)
|
|
|
|
| 74 |
gpu_info.update({
|
| 75 |
GPU_TEMP: temp,
|
| 76 |
GPU_Power: power_usage,
|
| 77 |
+
GPU_Mem: round(mem_usage / 1024, 2),
|
| 78 |
GPU_Util: gpu_util
|
| 79 |
})
|
| 80 |
|
| 81 |
if len(gpu_info) >= 4:
|
| 82 |
gpu_stats.append(gpu_info)
|
| 83 |
+
print(f"gpu_stats: {gpu_stats}")
|
| 84 |
gpu_name = f"{len(gpu_stats)}x{gpu_name}"
|
| 85 |
gpu_stats_total = {
|
| 86 |
GPU_TEMP: 0,
|
|
|
|
| 94 |
gpu_stats_total[GPU_Power] += gpu_stat[GPU_Power]
|
| 95 |
gpu_stats_total[GPU_Mem] += gpu_stat[GPU_Mem]
|
| 96 |
gpu_stats_total[GPU_Util] += gpu_stat[GPU_Util]
|
| 97 |
+
gpu_stats_total[GPU_Mem] = gpu_stats_total[GPU_Mem] # G
|
| 98 |
gpu_stats_total[GPU_TEMP] /= len(gpu_stats)
|
| 99 |
gpu_stats_total[GPU_Power] /= len(gpu_stats)
|
| 100 |
gpu_stats_total[GPU_Util] /= len(gpu_stats)
|