Spaces:
Paused
Paused
# coding=utf-8 | |
import json | |
if __name__ == "__main__": | |
dataset = [] | |
with open("comparison_data_v2.json", "r", encoding="utf-8") as f: | |
data = json.load(f) | |
for example in data: | |
instruction = example["user_input"] | |
resp_with_score = [(float(resp["score"]), resp["response"]) for resp in example["responses_and_scores"]] | |
resp_with_score.sort() | |
while len(resp_with_score[0][1]) == 0: | |
resp_with_score.pop(0) | |
if len(resp_with_score) == 0: | |
continue | |
min_score, max_score = resp_with_score[0][0], resp_with_score[-1][0] | |
if min_score < 5.0 and max_score > 5.0: | |
dataset.append({ | |
"instruction": instruction, | |
"input": "", | |
"output": [resp_with_score[-1][1], resp_with_score[0][1]] | |
}) | |
with open("comparison_gpt4_data_en.json", "w", encoding="utf-8", newline="\n") as f: | |
json.dump(dataset, f, indent=2, ensure_ascii=False) | |