Spaces:
Sleeping
Sleeping
Zekun Wu
commited on
Commit
·
657095c
1
Parent(s):
5aaaa8b
update
Browse files- util/generation.py +32 -20
util/generation.py
CHANGED
|
@@ -55,34 +55,46 @@ def invoke_retry(prompt,agent,parameters):
|
|
| 55 |
|
| 56 |
raise Exception("Failed to complete the API call after maximum retry attempts.")
|
| 57 |
|
| 58 |
-
def
|
| 59 |
-
"""
|
| 60 |
-
|
| 61 |
-
Accepts test_type to switch between 'multiple' and 'single' processing modes.
|
| 62 |
-
"""
|
| 63 |
-
if test_type == 'multiple':
|
| 64 |
-
categories = ['Privilege', 'Protect', 'Neutral']
|
| 65 |
-
elif test_type == 'single':
|
| 66 |
-
categories = ['Counterfactual', 'Neutral']
|
| 67 |
-
else:
|
| 68 |
-
raise ValueError("test_type must be either 'multiple' or 'single'")
|
| 69 |
|
| 70 |
-
# Initialize scores dictionary
|
| 71 |
-
scores = {category: [[] for _ in range(len(df))] for category in categories}
|
| 72 |
-
|
| 73 |
-
# Processing loop
|
| 74 |
for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
|
| 75 |
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing entries", unit="entry"):
|
| 76 |
-
for
|
| 77 |
-
prompt_temp = create_summary(row,
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
| 80 |
|
| 81 |
# Assign score lists and calculate average scores
|
| 82 |
-
for category in
|
| 83 |
df[f'{category}_Scores'] = pd.Series([lst for lst in scores[category]])
|
| 84 |
df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(
|
| 85 |
lambda scores: sum(score for score in scores if score is not None) / len(scores) if scores else None
|
| 86 |
)
|
| 87 |
|
| 88 |
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
raise Exception("Failed to complete the API call after maximum retry attempts.")
|
| 57 |
|
| 58 |
+
def process_scores_multiple(df, num_run,parameters,privilege_label,protect_label,agent,group_name,occupation):
|
| 59 |
+
""" Process entries and compute scores concurrently, with progress updates. """
|
| 60 |
+
scores = {key: [[] for _ in range(len(df))] for key in ['Privilege', 'Protect', 'Neutral']}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
|
| 63 |
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing entries", unit="entry"):
|
| 64 |
+
for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, None]):
|
| 65 |
+
prompt_temp = create_summary(row,group_name,label,occupation)
|
| 66 |
+
# print(f"Run {run + 1} - Entry {index + 1} - {key}:\n{prompt_temp}")
|
| 67 |
+
# print("=============================================================")
|
| 68 |
+
result = invoke_retry(prompt_temp,agent,parameters)
|
| 69 |
+
scores[key][index].append(result)
|
| 70 |
|
| 71 |
# Assign score lists and calculate average scores
|
| 72 |
+
for category in ['Privilege', 'Protect', 'Neutral']:
|
| 73 |
df[f'{category}_Scores'] = pd.Series([lst for lst in scores[category]])
|
| 74 |
df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(
|
| 75 |
lambda scores: sum(score for score in scores if score is not None) / len(scores) if scores else None
|
| 76 |
)
|
| 77 |
|
| 78 |
return df
|
| 79 |
+
|
| 80 |
+
def process_scores_single(df, num_run,parameters,counterfactual_label,agent,group_name,occupation):
|
| 81 |
+
""" Process entries and compute scores concurrently, with progress updates. """
|
| 82 |
+
scores = {key: [[] for _ in range(len(df))] for key in ['Counterfactual', 'Neutral']}
|
| 83 |
+
|
| 84 |
+
for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
|
| 85 |
+
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing entries", unit="entry"):
|
| 86 |
+
for key, label in zip(['Counterfactual', 'Neutral'], [counterfactual_label, None]):
|
| 87 |
+
prompt_temp = create_summary(row,group_name,label,occupation)
|
| 88 |
+
# print(f"Run {run + 1} - Entry {index + 1} - {key}:\n{prompt_temp}")
|
| 89 |
+
# print("=============================================================")
|
| 90 |
+
result = invoke_retry(prompt_temp,agent,parameters)
|
| 91 |
+
scores[key][index].append(result)
|
| 92 |
+
|
| 93 |
+
# Assign score lists and calculate average scores
|
| 94 |
+
for category in ['Counterfactual', 'Neutral']:
|
| 95 |
+
df[f'{category}_Scores'] = pd.Series([lst for lst in scores[category]])
|
| 96 |
+
df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(
|
| 97 |
+
lambda scores: sum(score for score in scores if score is not None) / len(scores) if scores else None
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
return df
|