Spaces:

NLP-UNED
/

dipromats2024-task2-leaderboard

Sleeping

App Files Files Community

anselp commited on Feb 7

Commit

a2032b5

verified ·

1 Parent(s): 7d62703

Delete dipromats_evaluation.py

Browse files

Files changed (1) hide show

dipromats_evaluation.py +0 -298

dipromats_evaluation.py DELETED Viewed

@@ -1,298 +0,0 @@
-import pandas as pd
-import json
-import numpy as np
-import warnings
-# Suprimir SettingWithCopyWarning
-warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)
-gold_json_path='./gold_test.json'
-import pandas as pd
-import json
-import numpy as np
-import warnings
-# Suprimir SettingWithCopyWarning
-warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)
-import json
-# Leer un archivo JSON
-gold_json_path='./gold_test.json'
-def evaluate_results(lang, file_path):
-    def load_gold():
-        df=pd.read_json(gold_json_path)
-        return df
-    def load_to_be_evaluated_set(file_path):
-        with open(file_path, 'r') as file:
-            data=json.load(file)
-            dft=pd.DataFrame(data)
-        return dft
-    def normalize_labels(df):
-        # Define a function that checks if each narrative is present and assigns "yes" or "no"
-        def convert_narratives(row):
-            country_code = row['country'][:2].upper()  # Get the country code ('RU', 'CH', etc.)
-            narratives = row['narratives']  # List of narratives for that row
-            # For each N1 to N6, check if it appears in the list of narratives
-            for i in range(1, 7):
-                narrative_code = f"{country_code}{i}"
-                row[f"N{i}"] = 'yes' if narrative_code in narratives else 'no'
-            return row
-        # Apply the function to each row of the DataFrame
-        data = df.apply(convert_narratives, axis=1)
-        # Drop the original 'narratives' column if no longer needed
-        data.drop(columns=['narratives', 'tweet_id'], inplace=True)
-        return data
-    def get_gold_lists_for_evaluation(gold_list, test_list):
-        gold_strict=[]
-        gold_lenient=[]
-        for i in range(0,6):
-            g=gold_list[i]
-            t=test_list[i]
-            g = 1 if g == 'yes' else 2 if g == 'no' else g
-            t = 1 if t == 'yes' else 2 if t == 'no' else t
-            if g==t:
-                gold_strict.append(g)
-                gold_lenient.append(g)
-            elif g!=t:
-                if g in [2, 1]:
-                    gold_strict.append(g)
-                    gold_lenient.append(g)
-                else:
-                    gold_strict.append(2)
-                    gold_lenient.append(t)
-        return gold_strict, gold_lenient
-    def gen_dic(lang):
-        narratives_list=['CH1', 'CH2', 'CH3', 'CH4', 'CH5', 'CH6', 'CH_micro', 'RU1', 'RU2', 'RU3', 'RU4', 'RU5', 'RU6', 'RU_micro', 'EU1', 'EU2', 'EU3', 'EU4', 'EU5', 'EU6', 'EU_micro', 'US1', 'US2', 'US3', 'US4', 'US5', 'US6', 'US_micro']
-        countries_dic={'China':'CH', 'Russia':'RU', 'EU':'EU', 'USA':'US'}
-        dic = {}
-        dic[lang] = {}
-        for ev in ['strict', 'lenient']:
-            if ev not in dic[lang]:
-                dic[lang][ev] = {}
-            for narr in narratives_list:
-                dic[lang][ev][narr] = {'scores': {'precision': 0., 'recall': 0., 'f1-score': 0.}, 'raw_data': []}
-            for code in countries_dic.values():
-                dic[lang][ev][f'{code}_micro'] = {'scores': {'precision': 0., 'recall': 0., 'f1-score': 0}, 'raw_data': []}
-            dic[lang][ev]['micro'] = {'scores': {'precision': 0., 'recall': 0., 'f1-score': 0}, 'raw_data': []}
-        return dic
-    def convert_labels(values):
-        return np.array([
-            [1 if v == 'yes' else 2 if v == 'no' else 3 for v in row]
-            for row in values
-        ])
-    def convert_floats(dic):
-        for key, value in dic.items():
-            if isinstance(value, np.float64):
-                dic[key] = float(value)
-            elif isinstance(value, dict):  # If the value is another dictionary, apply recursion
-                convert_floats(value)
-            elif isinstance(value, list):  # If the value is a list, convert individual elements
-                dic[key] = [float(v) if isinstance(v, np.float64) else v for v in value]
-    dic=gen_dic(lang)
-    countries_dic={'China':'CH', 'Russia':'RU', 'EU':'EU', 'USA':'US'}
-    cols=[f'N{i}' for i in range(1,7)]
-    df_gold=load_gold()
-    df_gold.drop_duplicates(subset=['id', 'lang'], keep='last', inplace=True)
-    df=df_gold[df_gold['lang']==lang]
-    df.reset_index(inplace=True, drop=True)
-    df_test=load_to_be_evaluated_set(file_path)
-    df_test=normalize_labels(df_test)
-    df_test.drop_duplicates(subset=['id', 'language'], keep='last', inplace=True)
-    df_test.reset_index(inplace=True, drop=True)
-    df_strict=df.copy()
-    df_lenient=df.copy()
-    for i in range(len(df)):
-        lang=df['lang'].iloc[i]
-        id=df['id'].iloc[i]
-        gold_values=df[cols].iloc[i].values
-        dft=df_test[(df_test['language']==lang) & (df_test['id']==id)]
-        test_values=dft[cols].iloc[0].values
-        df_strict.loc[i, cols], df_lenient.loc[i, cols]=get_gold_lists_for_evaluation(gold_values, test_values)
-    countries=['China', 'Russia', 'EU', 'USA']
-    df_lang=df[(df['lang']==lang)]
-    df_test_lang=df_test[(df_test['language']==lang)]
-    df_strict_lang=df_strict[df_strict['lang']==lang]
-    df_lenient_lang=df_lenient[df_lenient['lang']==lang]
-    #F1 per narrative
-    for country in countries:
-        df_dup_t=df[(df['country']==country) & (df['lang']==lang)]
-        df_strict_t=df_strict_lang[df_strict_lang['country']==country]
-        df_lenient_t=df_lenient_lang[df_lenient_lang['country']==country]
-        dft=df_test_lang[(df_test_lang['country']==country)]
-        real_strict=[]
-        real_lenient=[]
-        real=[]
-        pred=[]
-        for i in range(len(df_strict_t)):
-            id=df_strict_t['id'].iloc[i]
-            dft2=dft[dft['id']==id]
-            if len(dft2)!=0:
-                real_strict.append(df_strict_t[cols].iloc[i].values)
-                real_lenient.append(df_lenient_t[cols].iloc[i].values)
-                pred.append(dft2[cols].iloc[0].values)
-                real.append(df_dup_t[df_dup_t['id']==id][cols].iloc[0].values)
-        real_strict=np.array(real_strict)
-        real_lenient=np.array(real_lenient)
-        real = convert_labels(real)
-        pred = convert_labels(pred)
-        for i in range(0, 6):
-            raw_matrix = np.zeros((2, 3), dtype=int)  # 2 filas (pred), 3 columnas (real)
-            pred_options = [1, 2]  # 1 -> 'yes', 2 -> 'no'
-            real_options = [1, 3, 2]  # 1
-            p=pred[:,i]
-            r=real[:,i]
-            for p, r in zip(p, r):
-                pred_index = pred_options.index(p)
-                real_index = real_options.index(r)
-                raw_matrix[pred_index, real_index] += 1
-            tp=raw_matrix[0,0]
-            yl=raw_matrix[0,1]
-            fp=raw_matrix[0,2]
-            fn=raw_matrix[1,0]
-            nl=raw_matrix[1,1]
-            tn=raw_matrix[1,2]
-            dic[lang]['lenient'][f'{countries_dic[country]}{i+1}']['raw_data']=raw_matrix.tolist()
-            precision=(tp+yl)/(tp+yl+fp) if (tp+yl+fp)!=0 else 0
-            recall=(tp+yl)/(tp+fn+yl) if (tp+fn+yl)!=0 else 0
-            dic[lang]['lenient'][f'{countries_dic[country]}{i+1}']['scores']['precision']=precision
-            dic[lang]['lenient'][f'{countries_dic[country]}{i+1}']['scores']['recall']=recall
-            dic[lang]['lenient'][f'{countries_dic[country]}{i+1}']['scores']['f1-score']=(2*precision*recall)/(precision+recall) if (precision+recall)!=0 else 0
-            dic[lang]['strict'][f'{countries_dic[country]}{i+1}']['raw_data']=raw_matrix.tolist()
-            precision=tp/(tp+fp+yl)  if (tp+fp+yl)!=0 else 0
-            recall=tp/(tp+fn) if (tp+fn)!=0 else 0
-            dic[lang]['strict'][f'{countries_dic[country]}{i+1}']['scores']['precision']=precision
-            dic[lang]['strict'][f'{countries_dic[country]}{i+1}']['scores']['recall']=recall
-            dic[lang]['strict'][f'{countries_dic[country]}{i+1}']['scores']['f1-score']=(2*precision*recall)/(precision+recall) if (precision+recall)!=0 else 0
-        #F1 Micro
-        real_strict=[]
-        real_lenient=[]
-        pred=[]
-        not_match=[]
-        real=[]
-        for i in range(len(df_lang)):
-            id=df_lang['id'].iloc[i]
-            dft=df_test_lang[df_test_lang['id']==id][cols]
-            if len(dft)!=0:
-                real_strict.extend(df_strict_lang[cols].iloc[i].values)
-                real_lenient.extend(df_strict_lang[cols].iloc[i].values)
-                pred.extend(df_test_lang[df_test_lang['id']==id][cols].iloc[0].values)
-                real.extend(df_lang[df_lang['id']==id][cols].iloc[0].values)
-            else:
-                not_match.append(id)
-        real = convert_labels([real])[0]
-        pred = convert_labels([pred])[0]
-        raw_matrix=np.zeros((2,3), dtype=int)
-        pred_options = [1, 2]  # 1 -> 'yes', 2 -> 'no'
-        real_options = [1, 3, 2]  # 1
-        raw_matrix = np.zeros((2, 3), dtype=int)
-        for p, r in zip(pred, real):
-            pred_index = pred_options.index(p)
-            real_index = real_options.index(r)
-            raw_matrix[pred_index, real_index] += 1
-        tp=raw_matrix[0,0]
-        yl=raw_matrix[0,1]
-        fp=raw_matrix[0,2]
-        fn=raw_matrix[1,0]
-        nl=raw_matrix[1,1]
-        tn=raw_matrix[1,2]
-        dic[lang]['lenient']['micro']['raw_data']=raw_matrix.tolist()
-        precision=(tp+yl)/(tp+yl+fp) if (tp+yl+fp)!=0 else 0
-        recall=(tp+yl)/(tp+fn+yl) if (tp+fn+yl)!=0 else 0
-        dic[lang]['lenient']['micro']['scores']['precision']=precision
-        dic[lang]['lenient']['micro']['scores']['recall']=recall
-        dic[lang]['lenient']['micro']['scores']['f1-score']=(2*precision*recall)/(precision+recall) if (precision+recall)!=0 else 0
-        dic[lang]['strict']['micro']['raw_data']=raw_matrix.tolist()
-        precision=tp/(tp+fp+yl) if (tp+yl+fp)!=0 else 0
-        recall=tp/(tp+fn) if (tp+fn)!=0 else 0
-        dic[lang]['strict']['micro']['scores']['precision']=precision
-        dic[lang]['strict']['micro']['scores']['recall']=recall
-        dic[lang]['strict']['micro']['scores']['f1-score']=(2*precision*recall)/(precision+recall) if (precision+recall)!=0 else 0
-    #Micro-Countries
-    for country in countries_dic.values():
-        raw_matrix = np.sum([np.array(dic[f'{lang}']['strict'][f'{country}{i}']['raw_data']) for i in range(1, 7)], axis=0)
-        tp=raw_matrix[0,0]
-        yl=raw_matrix[0,1]
-        fp=raw_matrix[0,2]
-        fn=raw_matrix[1,0]
-        nl=raw_matrix[1,1]
-        tn=raw_matrix[1,2]
-        precision=(tp+yl)/(tp+yl+fp) if (tp+yl+fp)!=0 else 0
-        recall=(tp+yl)/(tp+fn+yl) if (tp+fn+yl)!=0 else 0
-        dic[lang]['lenient'][f'{country}_micro']['scores']['precision']=precision
-        dic[lang]['lenient'][f'{country}_micro']['scores']['recall']=recall
-        dic[lang]['lenient'][f'{country}_micro']['scores']['f1-score']=(2*precision*recall)/(precision+recall) if (precision+recall)!=0 else 0
-        dic[lang]['lenient'][f'{country}_micro']['raw_data']=raw_matrix.tolist()
-        precision=tp/(tp+fp+yl) if (tp+yl+fp)!=0 else 0
-        recall=tp/(tp+fn) if (tp+fn)!=0 else 0
-        dic[lang]['strict'][f'{country}_micro']['scores']['precision']=precision
-        dic[lang]['strict'][f'{country}_micro']['scores']['recall']=recall
-        dic[lang]['strict'][f'{country}_micro']['scores']['f1-score']=(2*precision*recall)/(precision+recall) if (precision+recall)!=0 else 0
-        dic[lang]['strict'][f'{country}_micro']['raw_data']=raw_matrix.tolist()
-    convert_floats(dic[lang])
-    return dic[lang]
-"""
-strict
-  narrative_country (e.g. CH1)
-    scores
-      precision
-      recall
-      f1-score
-    raw_data
-  country_micro (e.g. CH_micro)
-    scores
-      precision
-      recall
-      f1-score
-    raw_data
-  micro (global micro)
-    scores
-      precision
-      recall
-      f1-score
-    raw_data
-lenient
-  narrative_country (e.g. CH1)
-    scores
-      precision
-      recall
-      f1-score
-    raw_data
-  country_micro (e.g. CH_micro)
-    scores
-      precision
-      recall
-      f1-score
-    raw_data
-  micro (global micro)
-    scores
-      precision
-      recall
-      f1-score
-    raw_data"""