import pandas as pd import json import numpy as np import warnings # Suprimir SettingWithCopyWarning warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning) def evaluate_results(lang, gold, test): def normalize_labels(df): # Define a function that checks if each narrative is present and assigns "yes" or "no" def convert_narratives(row): country_code = row['country'][:2].upper() # Get the country code ('RU', 'CH', etc.) narratives = row['narratives'] # List of narratives for that row # For each N1 to N6, check if it appears in the list of narratives for i in range(1, 7): narrative_code = f"{country_code}{i}" row[f"N{i}"] = 'yes' if narrative_code in narratives else 'no' return row # Apply the function to each row of the DataFrame data = df.apply(convert_narratives, axis=1) # Drop the original 'narratives' column if no longer needed data.drop(columns=['narratives', 'tweet_id'], inplace=True) return data def get_gold_lists_for_evaluation(gold_list, test_list): gold_strict=[] gold_lenient=[] for i in range(0,6): g=gold_list[i] t=test_list[i] g = 1 if g == 'yes' else 2 if g == 'no' else g t = 1 if t == 'yes' else 2 if t == 'no' else t if g==t: gold_strict.append(g) gold_lenient.append(g) elif g!=t: if g in [2, 1]: gold_strict.append(g) gold_lenient.append(g) else: gold_strict.append(2) gold_lenient.append(t) return gold_strict, gold_lenient def gen_dic(lang): narratives_list=['CH1', 'CH2', 'CH3', 'CH4', 'CH5', 'CH6', 'CH_micro', 'RU1', 'RU2', 'RU3', 'RU4', 'RU5', 'RU6', 'RU_micro', 'EU1', 'EU2', 'EU3', 'EU4', 'EU5', 'EU6', 'EU_micro', 'US1', 'US2', 'US3', 'US4', 'US5', 'US6', 'US_micro'] countries_dic={'China':'CH', 'Russia':'RU', 'EU':'EU', 'USA':'US'} dic = {} dic[lang] = {} for ev in ['strict', 'lenient']: if ev not in dic[lang]: dic[lang][ev] = {} for narr in narratives_list: dic[lang][ev][narr] = {'scores': {'precision': 0., 'recall': 0., 'f1-score': 0.}, 'raw_data': []} for code in countries_dic.values(): dic[lang][ev][f'{code}_micro'] = {'scores': {'precision': 0., 'recall': 0., 'f1-score': 0}, 'raw_data': []} dic[lang][ev]['micro'] = {'scores': {'precision': 0., 'recall': 0., 'f1-score': 0}, 'raw_data': []} return dic def convert_labels(values): return np.array([ [1 if v == 'yes' else 2 if v == 'no' else 3 for v in row] for row in values ]) def convert_floats(dic): for key, value in dic.items(): if isinstance(value, np.float64): dic[key] = float(value) elif isinstance(value, dict): # If the value is another dictionary, apply recursion convert_floats(value) elif isinstance(value, list): # If the value is a list, convert individual elements dic[key] = [float(v) if isinstance(v, np.float64) else v for v in value] dic=gen_dic(lang) countries_dic={'China':'CH', 'Russia':'RU', 'EU':'EU', 'USA':'US'} cols=[f'N{i}' for i in range(1,7)] df_gold=pd.DataFrame(gold) df_gold["country"] = df_gold["country"].replace("European Union", "EU") df_gold.drop_duplicates(subset=['id', 'lang'], keep='last', inplace=True) df=df_gold[df_gold['lang']==lang] df.reset_index(inplace=True, drop=True) df_test=pd.DataFrame(test) df_test["country"] = df_test["country"].replace("European Union", "EU") df_test=normalize_labels(df_test) df_test.drop_duplicates(subset=['id', 'language'], keep='last', inplace=True) df_test.reset_index(inplace=True, drop=True) df_strict=df.copy() df_lenient=df.copy() for i in range(len(df)): lang=df['lang'].iloc[i] id=df['id'].iloc[i] gold_values=df[cols].iloc[i].values dft=df_test[(df_test['language']==lang) & (df_test['id']==id)] test_values=dft[cols].iloc[0].values df_strict.loc[i, cols], df_lenient.loc[i, cols]=get_gold_lists_for_evaluation(gold_values, test_values) countries=['China', 'Russia', 'EU', 'USA'] df_lang=df[(df['lang']==lang)] df_test_lang=df_test[(df_test['language']==lang)] df_strict_lang=df_strict[df_strict['lang']==lang] df_lenient_lang=df_lenient[df_lenient['lang']==lang] #F1 per narrative for country in countries: df_dup_t=df[(df['country']==country) & (df['lang']==lang)] df_strict_t=df_strict_lang[df_strict_lang['country']==country] df_lenient_t=df_lenient_lang[df_lenient_lang['country']==country] dft=df_test_lang[(df_test_lang['country']==country)] real_strict=[] real_lenient=[] real=[] pred=[] for i in range(len(df_strict_t)): id=df_strict_t['id'].iloc[i] dft2=dft[dft['id']==id] if len(dft2)!=0: real_strict.append(df_strict_t[cols].iloc[i].values) real_lenient.append(df_lenient_t[cols].iloc[i].values) pred.append(dft2[cols].iloc[0].values) real.append(df_dup_t[df_dup_t['id']==id][cols].iloc[0].values) real_strict=np.array(real_strict) real_lenient=np.array(real_lenient) real = convert_labels(real) pred = convert_labels(pred) for i in range(0, 6): raw_matrix = np.zeros((2, 3), dtype=int) # 2 filas (pred), 3 columnas (real) pred_options = [1, 2] # 1 -> 'yes', 2 -> 'no' real_options = [1, 3, 2] # 1 p=pred[:,i] r=real[:,i] for p, r in zip(p, r): pred_index = pred_options.index(p) real_index = real_options.index(r) raw_matrix[pred_index, real_index] += 1 tp=raw_matrix[0,0] yl=raw_matrix[0,1] fp=raw_matrix[0,2] fn=raw_matrix[1,0] nl=raw_matrix[1,1] tn=raw_matrix[1,2] dic[lang]['lenient'][f'{countries_dic[country]}{i+1}']['raw_data']=raw_matrix.tolist() precision=(tp+yl)/(tp+yl+fp) if (tp+yl+fp)!=0 else 0 recall=(tp+yl)/(tp+fn+yl) if (tp+fn+yl)!=0 else 0 dic[lang]['lenient'][f'{countries_dic[country]}{i+1}']['scores']['precision']=precision dic[lang]['lenient'][f'{countries_dic[country]}{i+1}']['scores']['recall']=recall dic[lang]['lenient'][f'{countries_dic[country]}{i+1}']['scores']['f1-score']=(2*precision*recall)/(precision+recall) if (precision+recall)!=0 else 0 dic[lang]['strict'][f'{countries_dic[country]}{i+1}']['raw_data']=raw_matrix.tolist() precision=tp/(tp+fp+yl) if (tp+fp+yl)!=0 else 0 recall=tp/(tp+fn) if (tp+fn)!=0 else 0 dic[lang]['strict'][f'{countries_dic[country]}{i+1}']['scores']['precision']=precision dic[lang]['strict'][f'{countries_dic[country]}{i+1}']['scores']['recall']=recall dic[lang]['strict'][f'{countries_dic[country]}{i+1}']['scores']['f1-score']=(2*precision*recall)/(precision+recall) if (precision+recall)!=0 else 0 #F1 Micro real_strict=[] real_lenient=[] pred=[] not_match=[] real=[] for i in range(len(df_lang)): id=df_lang['id'].iloc[i] dft=df_test_lang[df_test_lang['id']==id][cols] if len(dft)!=0: real_strict.extend(df_strict_lang[cols].iloc[i].values) real_lenient.extend(df_strict_lang[cols].iloc[i].values) pred.extend(df_test_lang[df_test_lang['id']==id][cols].iloc[0].values) real.extend(df_lang[df_lang['id']==id][cols].iloc[0].values) else: not_match.append(id) real = convert_labels([real])[0] pred = convert_labels([pred])[0] raw_matrix=np.zeros((2,3), dtype=int) pred_options = [1, 2] # 1 -> 'yes', 2 -> 'no' real_options = [1, 3, 2] # 1 raw_matrix = np.zeros((2, 3), dtype=int) for p, r in zip(pred, real): pred_index = pred_options.index(p) real_index = real_options.index(r) raw_matrix[pred_index, real_index] += 1 tp=raw_matrix[0,0] yl=raw_matrix[0,1] fp=raw_matrix[0,2] fn=raw_matrix[1,0] nl=raw_matrix[1,1] tn=raw_matrix[1,2] dic[lang]['lenient']['micro']['raw_data']=raw_matrix.tolist() precision=(tp+yl)/(tp+yl+fp) if (tp+yl+fp)!=0 else 0 recall=(tp+yl)/(tp+fn+yl) if (tp+fn+yl)!=0 else 0 dic[lang]['lenient']['micro']['scores']['precision']=precision dic[lang]['lenient']['micro']['scores']['recall']=recall dic[lang]['lenient']['micro']['scores']['f1-score']=(2*precision*recall)/(precision+recall) if (precision+recall)!=0 else 0 dic[lang]['strict']['micro']['raw_data']=raw_matrix.tolist() precision=tp/(tp+fp+yl) if (tp+yl+fp)!=0 else 0 recall=tp/(tp+fn) if (tp+fn)!=0 else 0 dic[lang]['strict']['micro']['scores']['precision']=precision dic[lang]['strict']['micro']['scores']['recall']=recall dic[lang]['strict']['micro']['scores']['f1-score']=(2*precision*recall)/(precision+recall) if (precision+recall)!=0 else 0 #Micro-Countries for country in countries_dic.values(): raw_matrix = np.sum([np.array(dic[f'{lang}']['strict'][f'{country}{i}']['raw_data']) for i in range(1, 7)], axis=0) tp=raw_matrix[0,0] yl=raw_matrix[0,1] fp=raw_matrix[0,2] fn=raw_matrix[1,0] nl=raw_matrix[1,1] tn=raw_matrix[1,2] precision=(tp+yl)/(tp+yl+fp) if (tp+yl+fp)!=0 else 0 recall=(tp+yl)/(tp+fn+yl) if (tp+fn+yl)!=0 else 0 dic[lang]['lenient'][f'{country}_micro']['scores']['precision']=precision dic[lang]['lenient'][f'{country}_micro']['scores']['recall']=recall dic[lang]['lenient'][f'{country}_micro']['scores']['f1-score']=(2*precision*recall)/(precision+recall) if (precision+recall)!=0 else 0 dic[lang]['lenient'][f'{country}_micro']['raw_data']=raw_matrix.tolist() precision=tp/(tp+fp+yl) if (tp+yl+fp)!=0 else 0 recall=tp/(tp+fn) if (tp+fn)!=0 else 0 dic[lang]['strict'][f'{country}_micro']['scores']['precision']=precision dic[lang]['strict'][f'{country}_micro']['scores']['recall']=recall dic[lang]['strict'][f'{country}_micro']['scores']['f1-score']=(2*precision*recall)/(precision+recall) if (precision+recall)!=0 else 0 dic[lang]['strict'][f'{country}_micro']['raw_data']=raw_matrix.tolist() convert_floats(dic[lang]) return dic[lang] """ strict narrative_country (e.g. CH1) scores precision recall f1-score raw_data country_micro (e.g. CH_micro) scores precision recall f1-score raw_data micro (global micro) scores precision recall f1-score raw_data lenient narrative_country (e.g. CH1) scores precision recall f1-score raw_data country_micro (e.g. CH_micro) scores precision recall f1-score raw_data micro (global micro) scores precision recall f1-score raw_data"""