| import pandas as pd | |
| from sentence_transformers.util import cos_sim | |
| from utils.models import SBert | |
| def p0_originality(df: pd.DataFrame, model_name: str) -> pd.DataFrame: | |
| assert 'prompt' in df.columns | |
| assert 'response' in df.columns | |
| model = SBert(model_name) | |
| def get_cos_sim(model, prompt: str, response: str) -> float: | |
| prompt_vec = model(prompt) | |
| response_vec = model(response) | |
| score = cos_sim(prompt_vec, response_vec).item() | |
| return score | |
| df['originality'] = df.apply(lambda x: 1 - get_cos_sim(model, x['prompt'], x['response']), axis=1) | |
| return df | |
| def p1_flexibility(df: pd.DataFrame, model_name: str) -> pd.DataFrame: | |
| df = p0_originality(df, model_name) | |
| assert 'id' in df.columns | |
| df_out = df.groupby(by=['id', 'prompt']) \ | |
| .agg({'id': 'first', 'prompt': 'first', 'originality': 'mean'}) \ | |
| .rename(columns={'originality': 'flexibility'}) \ | |
| .reset_index(drop=True) | |
| return df_out | |
| if __name__ == '__main__': | |
| _df_input = pd.read_csv('data/example_3.csv') | |
| _df_0 = p0_originality(_df_input, 'paraphrase-multilingual-MiniLM-L12-v2') | |
| _df_1 = p1_flexibility(_df_input, 'paraphrase-multilingual-MiniLM-L12-v2') | |