Spaces:
Sleeping
Sleeping
| import csv | |
| import pickle | |
| ### NOTICE: csv only accept two colomn input. but accept multi-time input. | |
| # 1_2_3, 1 is action, 2 is supply object, 3 is accept object | |
| def update_dict_csv(term_dict, f): | |
| for rows in csv.reader(f): | |
| word = rows[0].lower() | |
| if word in term_dict: | |
| if rows[1] not in term_dict[word]: | |
| term_dict[word] = term_dict[word]+[rows[1]] | |
| else: | |
| term_dict[word]=[rows[1]] | |
| pass | |
| def export_dict_csv(term_dict, f): | |
| for key, val in term_dict.items(): | |
| csv.writer(f).writerow([key, val]) | |
| pass | |
| def save_dict_pickle(term_dict, f): | |
| pickle.dump(term_dict, f, pickle.HIGHEST_PROTOCOL) | |
| pass | |
| def update_csv_pickle(pickle_f, csv_f): | |
| term_dict = pickle.load(pickle_f) | |
| update_dict_csv(term_dict, csv_f) | |
| #save to pickle file, highest protocal to get better performance | |
| pickle.dump(term_dict, pickle_f, pickle.HIGHEST_PROTOCOL) | |
| pass | |
| #demo | |
| term_dict_sc2 = {} | |
| with open("./finetune_data/dict_enzh.csv", 'r', encoding='utf-8') as f: | |
| update_dict_csv(term_dict_sc2,f) | |
| with open("../test.csv", "w", encoding='utf-8') as w: | |
| export_dict_csv(term_dict_sc2,w) | |
| ## for load pickle, just: | |
| # pickle.load(f) |