Spaces:
Running
Running
| import json | |
| import os | |
| import glob | |
| import argparse | |
| import csv | |
| def chatgpt_json(merge_file): | |
| # chat results | |
| merge_data = merge_file.decode("utf-8") | |
| merge_data = eval(merge_data) | |
| correct_answer_file = 'file/ANSWER.json' | |
| with open(correct_answer_file, 'r', encoding='utf-8') as f: | |
| correct_answer_data = json.load(f) | |
| dataset_scores_dict = {} | |
| for dataset_name, item in merge_data.items(): | |
| total_nums = len(item) | |
| correct = 0 | |
| # assert len(item) >= len(correct_answer_data[dataset_name]), f'Video-Bench-Input.json---{dataset_name}---is incomplete!' | |
| for id, sub_item in item.items(): | |
| if sub_item['output_chatgpt_choice'] == correct_answer_data[dataset_name][id]['answer']: | |
| correct += 1 | |
| dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2) | |
| return dataset_scores_dict | |
| def compute_scores(merge_file): | |
| dataset_score_dict = chatgpt_json(merge_file) | |
| dataset_weight = { | |
| 1: | |
| { | |
| "ActivityNet": 1, | |
| "MSVD": 1, | |
| "MSRVTT": 1, | |
| "TGIF": 1, | |
| "Youcook2": 1, | |
| "Ucfcrime": 1, | |
| "MOT": 0.5, | |
| }, | |
| 2: | |
| { | |
| "TVQA": 1, | |
| "MV": 1, | |
| "NBA": 1, | |
| }, | |
| 3: | |
| { | |
| "Driving-exam": 0.5, | |
| "Driving-decision-making": 1, | |
| "SQA3D": 1, | |
| } | |
| } | |
| # Video-exclusive Understanding score | |
| exclusive_understanding_weight = dataset_weight[1] | |
| weights_sum = sum(exclusive_understanding_weight.values()) | |
| exclusive_understanding_score = 0 | |
| # import ipdb; ipdb.set_trace() | |
| for dataset_name, weight in exclusive_understanding_weight.items(): | |
| exclusive_understanding_score += weight * dataset_score_dict[dataset_name] / weights_sum | |
| # Prior Knowledge-based Question-answer | |
| prior_QA_weight = dataset_weight[2] | |
| weights_sum = sum(prior_QA_weight.values()) | |
| prior_QA_score = 0 | |
| for dataset_name, weight in prior_QA_weight.items(): | |
| prior_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum | |
| # Comprehension and Decision-making | |
| com_and_dec_QA_weight = dataset_weight[3] | |
| weights_sum = sum(com_and_dec_QA_weight.values()) | |
| com_and_dec_QA_score = 0 | |
| for dataset_name, weight in com_and_dec_QA_weight.items(): | |
| com_and_dec_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum | |
| dataset_score_dict['Exclusive_understanding'] = exclusive_understanding_score | |
| dataset_score_dict['Prior_Knowledge'] = prior_QA_score | |
| dataset_score_dict['Comprehension_and_Decision-making'] = com_and_dec_QA_score | |
| # final score | |
| final_score = sum([exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score]) / 3 | |
| dataset_score_dict['final_score'] = final_score | |
| # print(dataset_score_dict) | |
| # with open(args.score_output_file, 'w', encoding='utf-8') as f: | |
| # json.dump(dataset_score_dict, f, indent=2) | |
| # print(f'{args.score_output_file} is saved!') | |
| # ======================== | |
| data = [ | |
| ["Avg. All", "Avg. Video-Exclusive", "Avg. Prior-Knowledge QA", "Avg. Decision-Making", | |
| "ActivityNet", "MSVD", "MSRVTT", "TGIF", "Youcook2", "Ucfcrime", | |
| "MOT", "TVQA", "MV", "NBA", "Driving-exam", "Driving-decision-making", "SQA3D"], | |
| [final_score, exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score, | |
| dataset_score_dict['ActivityNet'], | |
| dataset_score_dict["MSVD"], | |
| dataset_score_dict['MSRVTT'], | |
| dataset_score_dict['TGIF'], | |
| dataset_score_dict['Youcook2'], | |
| dataset_score_dict['Ucfcrime'], | |
| dataset_score_dict['MOT'], | |
| dataset_score_dict['TVQA'], | |
| dataset_score_dict['MV'], | |
| dataset_score_dict['NBA'], | |
| dataset_score_dict['Driving-exam'], | |
| dataset_score_dict['Driving-decision-making'], | |
| dataset_score_dict['SQA3D'], | |
| ], | |
| ] | |
| return data | |