Spaces:
Sleeping
Sleeping
Delete experiment.ipynb
Browse files- experiment.ipynb +0 -201
experiment.ipynb
DELETED
@@ -1,201 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": null,
|
6 |
-
"id": "initial_id",
|
7 |
-
"metadata": {
|
8 |
-
"collapsed": true,
|
9 |
-
"is_executing": true,
|
10 |
-
"ExecuteTime": {
|
11 |
-
"start_time": "2024-05-31T11:06:03.089830Z"
|
12 |
-
}
|
13 |
-
},
|
14 |
-
"outputs": [
|
15 |
-
{
|
16 |
-
"name": "stdout",
|
17 |
-
"output_type": "stream",
|
18 |
-
"text": [
|
19 |
-
"Processing 100 entries with 1 runs each.\n"
|
20 |
-
]
|
21 |
-
},
|
22 |
-
{
|
23 |
-
"name": "stderr",
|
24 |
-
"output_type": "stream",
|
25 |
-
"text": [
|
26 |
-
"Processing runs: 0%| | 0/1 [00:00<?, ?run/s]\n",
|
27 |
-
"Processing entries: 0%| | 0/100 [00:00<?, ?entry/s]\u001B[A\n",
|
28 |
-
"Processing entries: 1%| | 1/100 [00:47<1:17:58, 47.26s/entry]\u001B[A\n",
|
29 |
-
"Processing entries: 2%|β | 2/100 [01:15<58:51, 36.04s/entry] \u001B[A\n",
|
30 |
-
"Processing entries: 3%|β | 3/100 [01:49<56:30, 34.95s/entry]\u001B[A\n",
|
31 |
-
"Processing entries: 4%|β | 4/100 [02:21<54:34, 34.11s/entry]\u001B[A\n",
|
32 |
-
"Processing entries: 5%|β | 5/100 [02:59<56:11, 35.49s/entry]\u001B[A\n",
|
33 |
-
"Processing entries: 6%|β | 6/100 [03:35<55:33, 35.46s/entry]\u001B[A\n",
|
34 |
-
"Processing entries: 7%|β | 7/100 [04:12<55:48, 36.00s/entry]\u001B[A\n",
|
35 |
-
"Processing entries: 8%|β | 8/100 [04:52<57:20, 37.40s/entry]\u001B[A\n",
|
36 |
-
"Processing entries: 9%|β | 9/100 [05:19<51:31, 33.97s/entry]\u001B[A\n",
|
37 |
-
"Processing entries: 10%|β | 10/100 [15:46<5:25:34, 217.06s/entry]\u001B[A\n",
|
38 |
-
"Processing entries: 11%|β | 11/100 [16:11<3:55:07, 158.51s/entry]\u001B[A\n",
|
39 |
-
"Processing entries: 12%|ββ | 12/100 [17:15<3:09:53, 129.48s/entry]\u001B[A\n",
|
40 |
-
"Processing entries: 13%|ββ | 13/100 [17:54<2:28:14, 102.23s/entry]\u001B[A\n",
|
41 |
-
"Processing entries: 14%|ββ | 14/100 [18:32<1:58:40, 82.79s/entry] \u001B[A\n",
|
42 |
-
"Processing entries: 15%|ββ | 15/100 [19:00<1:34:01, 66.37s/entry]\u001B[A\n",
|
43 |
-
"Processing entries: 16%|ββ | 16/100 [19:45<1:23:45, 59.83s/entry]\u001B[A\n",
|
44 |
-
"Processing entries: 17%|ββ | 17/100 [20:27<1:15:23, 54.50s/entry]\u001B[A\n",
|
45 |
-
"Processing entries: 18%|ββ | 18/100 [20:55<1:03:40, 46.59s/entry]\u001B[A\n",
|
46 |
-
"Processing entries: 19%|ββ | 19/100 [21:27<56:41, 42.00s/entry] \u001B[A\n",
|
47 |
-
"Processing entries: 20%|ββ | 20/100 [22:12<57:27, 43.09s/entry]\u001B[A\n",
|
48 |
-
"Processing entries: 21%|ββ | 21/100 [22:41<51:05, 38.81s/entry]\u001B[A\n",
|
49 |
-
"Processing entries: 22%|βββ | 22/100 [23:06<45:15, 34.81s/entry]\u001B[A\n",
|
50 |
-
"Processing entries: 23%|βββ | 23/100 [23:50<48:06, 37.49s/entry]\u001B[A\n",
|
51 |
-
"Processing entries: 24%|βββ | 24/100 [24:21<44:54, 35.46s/entry]\u001B[A\n",
|
52 |
-
"Processing entries: 25%|βββ | 25/100 [25:00<45:50, 36.68s/entry]\u001B[A\n",
|
53 |
-
"Processing entries: 26%|βββ | 26/100 [25:35<44:32, 36.12s/entry]\u001B[A\n",
|
54 |
-
"Processing entries: 27%|βββ | 27/100 [26:04<41:14, 33.89s/entry]\u001B[A\n",
|
55 |
-
"Processing entries: 28%|βββ | 28/100 [26:29<37:26, 31.19s/entry]\u001B[A\n",
|
56 |
-
"Processing entries: 29%|βββ | 29/100 [27:04<38:24, 32.46s/entry]\u001B[A\n",
|
57 |
-
"Processing entries: 30%|βββ | 30/100 [27:38<38:15, 32.79s/entry]\u001B[A\n",
|
58 |
-
"Processing entries: 31%|βββ | 31/100 [28:20<40:55, 35.59s/entry]\u001B[A\n",
|
59 |
-
"Processing entries: 32%|ββββ | 32/100 [29:08<44:23, 39.18s/entry]\u001B[A\n",
|
60 |
-
"Processing entries: 33%|ββββ | 33/100 [29:37<40:20, 36.13s/entry]\u001B[A\n",
|
61 |
-
"Processing entries: 34%|ββββ | 34/100 [30:23<43:02, 39.13s/entry]\u001B[A\n",
|
62 |
-
"Processing entries: 35%|ββββ | 35/100 [31:19<47:58, 44.28s/entry]\u001B[A\n",
|
63 |
-
"Processing entries: 36%|ββββ | 36/100 [32:01<46:39, 43.75s/entry]\u001B[A\n",
|
64 |
-
"Processing entries: 37%|ββββ | 37/100 [32:27<40:11, 38.28s/entry]\u001B[A\n",
|
65 |
-
"Processing entries: 38%|ββββ | 38/100 [32:53<35:51, 34.71s/entry]\u001B[A\n",
|
66 |
-
"Processing entries: 39%|ββββ | 39/100 [33:31<36:05, 35.50s/entry]\u001B[A\n",
|
67 |
-
"Processing entries: 40%|ββββ | 40/100 [34:11<37:01, 37.02s/entry]\u001B[A\n",
|
68 |
-
"Processing entries: 41%|ββββ | 41/100 [34:39<33:41, 34.27s/entry]\u001B[A\n",
|
69 |
-
"Processing entries: 42%|βββββ | 42/100 [35:23<35:54, 37.15s/entry]\u001B[A\n",
|
70 |
-
"Processing entries: 43%|βββββ | 43/100 [35:50<32:32, 34.26s/entry]\u001B[A\n",
|
71 |
-
"Processing entries: 44%|βββββ | 44/100 [36:29<33:08, 35.50s/entry]\u001B[A\n",
|
72 |
-
"Processing entries: 45%|βββββ | 45/100 [37:01<31:34, 34.45s/entry]\u001B[A\n",
|
73 |
-
"Processing entries: 46%|βββββ | 46/100 [37:32<30:13, 33.58s/entry]\u001B[A\n",
|
74 |
-
"Processing entries: 47%|βββββ | 47/100 [38:16<32:12, 36.47s/entry]\u001B[A\n",
|
75 |
-
"Processing entries: 48%|βββββ | 48/100 [39:01<33:47, 38.99s/entry]\u001B[A\n",
|
76 |
-
"Processing entries: 49%|βββββ | 49/100 [39:24<29:07, 34.27s/entry]\u001B[A\n",
|
77 |
-
"Processing entries: 50%|βββββ | 50/100 [40:13<32:16, 38.72s/entry]\u001B[A\n",
|
78 |
-
"Processing entries: 51%|βββββ | 51/100 [40:48<30:50, 37.78s/entry]\u001B[A\n",
|
79 |
-
"Processing entries: 52%|ββββββ | 52/100 [41:28<30:41, 38.37s/entry]\u001B[A\n",
|
80 |
-
"Processing entries: 53%|ββββββ | 53/100 [42:06<29:48, 38.06s/entry]\u001B[A\n",
|
81 |
-
"Processing entries: 54%|ββββββ | 54/100 [42:29<25:54, 33.80s/entry]\u001B[A\n",
|
82 |
-
"Processing entries: 55%|ββββββ | 55/100 [43:06<26:00, 34.68s/entry]\u001B[A\n",
|
83 |
-
"Processing entries: 56%|ββββββ | 56/100 [43:33<23:48, 32.46s/entry]\u001B[A\n",
|
84 |
-
"Processing entries: 57%|ββββββ | 57/100 [44:28<27:57, 39.02s/entry]\u001B[A\n",
|
85 |
-
"Processing entries: 58%|ββββββ | 58/100 [45:05<26:58, 38.53s/entry]\u001B[A\n",
|
86 |
-
"Processing entries: 59%|ββββββ | 59/100 [45:46<26:48, 39.22s/entry]\u001B[A\n",
|
87 |
-
"Processing entries: 60%|ββββββ | 60/100 [46:26<26:18, 39.46s/entry]\u001B[A\n",
|
88 |
-
"Processing entries: 61%|ββββββ | 61/100 [46:57<23:54, 36.77s/entry]\u001B[A\n",
|
89 |
-
"Processing entries: 62%|βββββββ | 62/100 [47:19<20:29, 32.36s/entry]\u001B[A\n",
|
90 |
-
"Processing entries: 63%|βββββββ | 63/100 [48:23<25:58, 42.12s/entry]\u001B[A\n",
|
91 |
-
"Processing entries: 64%|βββββββ | 64/100 [48:56<23:34, 39.29s/entry]\u001B[A\n",
|
92 |
-
"Processing entries: 65%|βββββββ | 65/100 [49:25<21:03, 36.11s/entry]\u001B[A\n",
|
93 |
-
"Processing entries: 66%|βββββββ | 66/100 [49:57<19:47, 34.92s/entry]\u001B[A\n",
|
94 |
-
"Processing entries: 67%|βββββββ | 67/100 [50:20<17:17, 31.44s/entry]\u001B[A\n",
|
95 |
-
"Processing entries: 68%|βββββββ | 68/100 [51:01<18:10, 34.07s/entry]\u001B[A\n",
|
96 |
-
"Processing entries: 69%|βββββββ | 69/100 [52:08<22:46, 44.07s/entry]\u001B[A\n",
|
97 |
-
"Processing entries: 70%|βββββββ | 70/100 [52:39<20:04, 40.14s/entry]\u001B[A\n",
|
98 |
-
"Processing entries: 71%|βββββββ | 71/100 [53:26<20:24, 42.22s/entry]\u001B[A\n",
|
99 |
-
"Processing entries: 72%|ββββββββ | 72/100 [53:53<17:38, 37.80s/entry]\u001B[A\n",
|
100 |
-
"Processing entries: 73%|ββββββββ | 73/100 [54:21<15:36, 34.67s/entry]\u001B[A\n",
|
101 |
-
"Processing entries: 74%|ββββββββ | 74/100 [55:17<17:47, 41.06s/entry]\u001B[A\n",
|
102 |
-
"Processing entries: 75%|ββββββββ | 75/100 [56:20<19:56, 47.84s/entry]\u001B[A\n",
|
103 |
-
"Processing entries: 76%|ββββββββ | 76/100 [58:05<25:57, 64.89s/entry]\u001B[A\n",
|
104 |
-
"Processing entries: 77%|ββββββββ | 77/100 [59:07<24:30, 63.95s/entry]\u001B[A\n",
|
105 |
-
"Processing entries: 78%|ββββββββ | 78/100 [59:53<21:26, 58.48s/entry]\u001B[A\n",
|
106 |
-
"Processing entries: 79%|ββββββββ | 79/100 [1:00:32<18:30, 52.89s/entry]\u001B[A\n",
|
107 |
-
"Processing entries: 80%|ββββββββ | 80/100 [1:01:09<16:01, 48.10s/entry]\u001B[A\n",
|
108 |
-
"Processing entries: 81%|ββββββββ | 81/100 [1:01:38<13:24, 42.37s/entry]\u001B[A\n",
|
109 |
-
"Processing entries: 82%|βββββββββ | 82/100 [1:02:13<12:00, 40.02s/entry]\u001B[A"
|
110 |
-
]
|
111 |
-
}
|
112 |
-
],
|
113 |
-
"source": [
|
114 |
-
"import pandas as pd\n",
|
115 |
-
"from util.injection import process_scores_multiple\n",
|
116 |
-
"from util.model import AzureAgent, GPTAgent, Claude3Agent\n",
|
117 |
-
"from util.prompt import PROMPT_TEMPLATE\n",
|
118 |
-
"\n",
|
119 |
-
"def run_experiment(api_key, model_type, deployment_name, temperature, max_tokens, occupation,\n",
|
120 |
-
" sample_size, group_name, privilege_label, protect_label, num_run, prompt_template, endpoint_url=None):\n",
|
121 |
-
" # Load data\n",
|
122 |
-
" df = pd.read_csv(\"resume_subsampled.csv\")\n",
|
123 |
-
" \n",
|
124 |
-
" # Filter data by occupation\n",
|
125 |
-
" df = df[df[\"Occupation\"] == occupation]\n",
|
126 |
-
" df = df.sample(n=sample_size, random_state=42)\n",
|
127 |
-
" \n",
|
128 |
-
" # Initialize the agent\n",
|
129 |
-
" if model_type == 'AzureAgent':\n",
|
130 |
-
" agent = AzureAgent(api_key, endpoint_url, deployment_name)\n",
|
131 |
-
" elif model_type == 'GPTAgent':\n",
|
132 |
-
" api_version = '2024-02-15-preview'\n",
|
133 |
-
" agent = GPTAgent(api_key, endpoint_url, deployment_name, api_version)\n",
|
134 |
-
" else:\n",
|
135 |
-
" agent = Claude3Agent(api_key, deployment_name)\n",
|
136 |
-
" \n",
|
137 |
-
" # Process data\n",
|
138 |
-
" parameters = {\"temperature\": temperature, \"max_tokens\": max_tokens}\n",
|
139 |
-
" preprocessed_df = process_scores_multiple(df, num_run, parameters, privilege_label, protect_label, agent, group_name, occupation, prompt_template)\n",
|
140 |
-
" \n",
|
141 |
-
" return preprocessed_df\n",
|
142 |
-
"\n",
|
143 |
-
"# Set experiment parameters\n",
|
144 |
-
"api_key = \"6c75a8235f204c9e8cf6228e485982f7\"\n",
|
145 |
-
"model_type = \"GPTAgent\" # or \"AzureAgent\" or \"Claude3Agent\"\n",
|
146 |
-
"deployment_name = \"gpt4-1106\"\n",
|
147 |
-
"temperature = 0.0\n",
|
148 |
-
"max_tokens = 300\n",
|
149 |
-
"file_path = \"resume_subsampled.csv\" # or path to your file\n",
|
150 |
-
"occupation = \"FINANCE\"\n",
|
151 |
-
"sample_size = 100\n",
|
152 |
-
"group_name = \"Gender\"\n",
|
153 |
-
"privilege_label = \"Male\"\n",
|
154 |
-
"protect_label = \"Female\"\n",
|
155 |
-
"num_run = 1\n",
|
156 |
-
"prompt_template = PROMPT_TEMPLATE\n",
|
157 |
-
"endpoint_url = \"https://safeguard-monitor.openai.azure.com/\"\n",
|
158 |
-
"\n",
|
159 |
-
"# Run experiment\n",
|
160 |
-
"results = run_experiment(api_key, model_type, deployment_name, temperature, max_tokens, occupation,\n",
|
161 |
-
" sample_size, group_name, privilege_label, protect_label, num_run, prompt_template, endpoint_url)\n",
|
162 |
-
"\n",
|
163 |
-
"# Display results\n",
|
164 |
-
"results.head()\n",
|
165 |
-
"\n",
|
166 |
-
"# Optionally save results to a CSV file\n",
|
167 |
-
"results.to_csv(f'result/{occupation}_results.csv', index=False)\n"
|
168 |
-
]
|
169 |
-
},
|
170 |
-
{
|
171 |
-
"cell_type": "code",
|
172 |
-
"outputs": [],
|
173 |
-
"source": [],
|
174 |
-
"metadata": {
|
175 |
-
"collapsed": false
|
176 |
-
},
|
177 |
-
"id": "43711da68c012a83"
|
178 |
-
}
|
179 |
-
],
|
180 |
-
"metadata": {
|
181 |
-
"kernelspec": {
|
182 |
-
"display_name": "Python 3",
|
183 |
-
"language": "python",
|
184 |
-
"name": "python3"
|
185 |
-
},
|
186 |
-
"language_info": {
|
187 |
-
"codemirror_mode": {
|
188 |
-
"name": "ipython",
|
189 |
-
"version": 2
|
190 |
-
},
|
191 |
-
"file_extension": ".py",
|
192 |
-
"mimetype": "text/x-python",
|
193 |
-
"name": "python",
|
194 |
-
"nbconvert_exporter": "python",
|
195 |
-
"pygments_lexer": "ipython2",
|
196 |
-
"version": "2.7.6"
|
197 |
-
}
|
198 |
-
},
|
199 |
-
"nbformat": 4,
|
200 |
-
"nbformat_minor": 5
|
201 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|