wu981526092 commited on
Commit
91e8e74
Β·
verified Β·
1 Parent(s): b25bb07

Delete experiment.ipynb

Browse files
Files changed (1) hide show
  1. experiment.ipynb +0 -201
experiment.ipynb DELETED
@@ -1,201 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "id": "initial_id",
7
- "metadata": {
8
- "collapsed": true,
9
- "is_executing": true,
10
- "ExecuteTime": {
11
- "start_time": "2024-05-31T11:06:03.089830Z"
12
- }
13
- },
14
- "outputs": [
15
- {
16
- "name": "stdout",
17
- "output_type": "stream",
18
- "text": [
19
- "Processing 100 entries with 1 runs each.\n"
20
- ]
21
- },
22
- {
23
- "name": "stderr",
24
- "output_type": "stream",
25
- "text": [
26
- "Processing runs: 0%| | 0/1 [00:00<?, ?run/s]\n",
27
- "Processing entries: 0%| | 0/100 [00:00<?, ?entry/s]\u001B[A\n",
28
- "Processing entries: 1%| | 1/100 [00:47<1:17:58, 47.26s/entry]\u001B[A\n",
29
- "Processing entries: 2%|▏ | 2/100 [01:15<58:51, 36.04s/entry] \u001B[A\n",
30
- "Processing entries: 3%|β–Ž | 3/100 [01:49<56:30, 34.95s/entry]\u001B[A\n",
31
- "Processing entries: 4%|▍ | 4/100 [02:21<54:34, 34.11s/entry]\u001B[A\n",
32
- "Processing entries: 5%|β–Œ | 5/100 [02:59<56:11, 35.49s/entry]\u001B[A\n",
33
- "Processing entries: 6%|β–Œ | 6/100 [03:35<55:33, 35.46s/entry]\u001B[A\n",
34
- "Processing entries: 7%|β–‹ | 7/100 [04:12<55:48, 36.00s/entry]\u001B[A\n",
35
- "Processing entries: 8%|β–Š | 8/100 [04:52<57:20, 37.40s/entry]\u001B[A\n",
36
- "Processing entries: 9%|β–‰ | 9/100 [05:19<51:31, 33.97s/entry]\u001B[A\n",
37
- "Processing entries: 10%|β–ˆ | 10/100 [15:46<5:25:34, 217.06s/entry]\u001B[A\n",
38
- "Processing entries: 11%|β–ˆ | 11/100 [16:11<3:55:07, 158.51s/entry]\u001B[A\n",
39
- "Processing entries: 12%|β–ˆβ– | 12/100 [17:15<3:09:53, 129.48s/entry]\u001B[A\n",
40
- "Processing entries: 13%|β–ˆβ–Ž | 13/100 [17:54<2:28:14, 102.23s/entry]\u001B[A\n",
41
- "Processing entries: 14%|β–ˆβ– | 14/100 [18:32<1:58:40, 82.79s/entry] \u001B[A\n",
42
- "Processing entries: 15%|β–ˆβ–Œ | 15/100 [19:00<1:34:01, 66.37s/entry]\u001B[A\n",
43
- "Processing entries: 16%|β–ˆβ–Œ | 16/100 [19:45<1:23:45, 59.83s/entry]\u001B[A\n",
44
- "Processing entries: 17%|β–ˆβ–‹ | 17/100 [20:27<1:15:23, 54.50s/entry]\u001B[A\n",
45
- "Processing entries: 18%|β–ˆβ–Š | 18/100 [20:55<1:03:40, 46.59s/entry]\u001B[A\n",
46
- "Processing entries: 19%|β–ˆβ–‰ | 19/100 [21:27<56:41, 42.00s/entry] \u001B[A\n",
47
- "Processing entries: 20%|β–ˆβ–ˆ | 20/100 [22:12<57:27, 43.09s/entry]\u001B[A\n",
48
- "Processing entries: 21%|β–ˆβ–ˆ | 21/100 [22:41<51:05, 38.81s/entry]\u001B[A\n",
49
- "Processing entries: 22%|β–ˆβ–ˆβ– | 22/100 [23:06<45:15, 34.81s/entry]\u001B[A\n",
50
- "Processing entries: 23%|β–ˆβ–ˆβ–Ž | 23/100 [23:50<48:06, 37.49s/entry]\u001B[A\n",
51
- "Processing entries: 24%|β–ˆβ–ˆβ– | 24/100 [24:21<44:54, 35.46s/entry]\u001B[A\n",
52
- "Processing entries: 25%|β–ˆβ–ˆβ–Œ | 25/100 [25:00<45:50, 36.68s/entry]\u001B[A\n",
53
- "Processing entries: 26%|β–ˆβ–ˆβ–Œ | 26/100 [25:35<44:32, 36.12s/entry]\u001B[A\n",
54
- "Processing entries: 27%|β–ˆβ–ˆβ–‹ | 27/100 [26:04<41:14, 33.89s/entry]\u001B[A\n",
55
- "Processing entries: 28%|β–ˆβ–ˆβ–Š | 28/100 [26:29<37:26, 31.19s/entry]\u001B[A\n",
56
- "Processing entries: 29%|β–ˆβ–ˆβ–‰ | 29/100 [27:04<38:24, 32.46s/entry]\u001B[A\n",
57
- "Processing entries: 30%|β–ˆβ–ˆβ–ˆ | 30/100 [27:38<38:15, 32.79s/entry]\u001B[A\n",
58
- "Processing entries: 31%|β–ˆβ–ˆβ–ˆ | 31/100 [28:20<40:55, 35.59s/entry]\u001B[A\n",
59
- "Processing entries: 32%|β–ˆβ–ˆβ–ˆβ– | 32/100 [29:08<44:23, 39.18s/entry]\u001B[A\n",
60
- "Processing entries: 33%|β–ˆβ–ˆβ–ˆβ–Ž | 33/100 [29:37<40:20, 36.13s/entry]\u001B[A\n",
61
- "Processing entries: 34%|β–ˆβ–ˆβ–ˆβ– | 34/100 [30:23<43:02, 39.13s/entry]\u001B[A\n",
62
- "Processing entries: 35%|β–ˆβ–ˆβ–ˆβ–Œ | 35/100 [31:19<47:58, 44.28s/entry]\u001B[A\n",
63
- "Processing entries: 36%|β–ˆβ–ˆβ–ˆβ–Œ | 36/100 [32:01<46:39, 43.75s/entry]\u001B[A\n",
64
- "Processing entries: 37%|β–ˆβ–ˆβ–ˆβ–‹ | 37/100 [32:27<40:11, 38.28s/entry]\u001B[A\n",
65
- "Processing entries: 38%|β–ˆβ–ˆβ–ˆβ–Š | 38/100 [32:53<35:51, 34.71s/entry]\u001B[A\n",
66
- "Processing entries: 39%|β–ˆβ–ˆβ–ˆβ–‰ | 39/100 [33:31<36:05, 35.50s/entry]\u001B[A\n",
67
- "Processing entries: 40%|β–ˆβ–ˆβ–ˆβ–ˆ | 40/100 [34:11<37:01, 37.02s/entry]\u001B[A\n",
68
- "Processing entries: 41%|β–ˆβ–ˆβ–ˆβ–ˆ | 41/100 [34:39<33:41, 34.27s/entry]\u001B[A\n",
69
- "Processing entries: 42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 42/100 [35:23<35:54, 37.15s/entry]\u001B[A\n",
70
- "Processing entries: 43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 43/100 [35:50<32:32, 34.26s/entry]\u001B[A\n",
71
- "Processing entries: 44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 44/100 [36:29<33:08, 35.50s/entry]\u001B[A\n",
72
- "Processing entries: 45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 45/100 [37:01<31:34, 34.45s/entry]\u001B[A\n",
73
- "Processing entries: 46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 46/100 [37:32<30:13, 33.58s/entry]\u001B[A\n",
74
- "Processing entries: 47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 47/100 [38:16<32:12, 36.47s/entry]\u001B[A\n",
75
- "Processing entries: 48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 48/100 [39:01<33:47, 38.99s/entry]\u001B[A\n",
76
- "Processing entries: 49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 49/100 [39:24<29:07, 34.27s/entry]\u001B[A\n",
77
- "Processing entries: 50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 50/100 [40:13<32:16, 38.72s/entry]\u001B[A\n",
78
- "Processing entries: 51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 51/100 [40:48<30:50, 37.78s/entry]\u001B[A\n",
79
- "Processing entries: 52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 52/100 [41:28<30:41, 38.37s/entry]\u001B[A\n",
80
- "Processing entries: 53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 53/100 [42:06<29:48, 38.06s/entry]\u001B[A\n",
81
- "Processing entries: 54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 54/100 [42:29<25:54, 33.80s/entry]\u001B[A\n",
82
- "Processing entries: 55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 55/100 [43:06<26:00, 34.68s/entry]\u001B[A\n",
83
- "Processing entries: 56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 56/100 [43:33<23:48, 32.46s/entry]\u001B[A\n",
84
- "Processing entries: 57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 57/100 [44:28<27:57, 39.02s/entry]\u001B[A\n",
85
- "Processing entries: 58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 58/100 [45:05<26:58, 38.53s/entry]\u001B[A\n",
86
- "Processing entries: 59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 59/100 [45:46<26:48, 39.22s/entry]\u001B[A\n",
87
- "Processing entries: 60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 60/100 [46:26<26:18, 39.46s/entry]\u001B[A\n",
88
- "Processing entries: 61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 61/100 [46:57<23:54, 36.77s/entry]\u001B[A\n",
89
- "Processing entries: 62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 62/100 [47:19<20:29, 32.36s/entry]\u001B[A\n",
90
- "Processing entries: 63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 63/100 [48:23<25:58, 42.12s/entry]\u001B[A\n",
91
- "Processing entries: 64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 64/100 [48:56<23:34, 39.29s/entry]\u001B[A\n",
92
- "Processing entries: 65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 65/100 [49:25<21:03, 36.11s/entry]\u001B[A\n",
93
- "Processing entries: 66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 66/100 [49:57<19:47, 34.92s/entry]\u001B[A\n",
94
- "Processing entries: 67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 67/100 [50:20<17:17, 31.44s/entry]\u001B[A\n",
95
- "Processing entries: 68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 68/100 [51:01<18:10, 34.07s/entry]\u001B[A\n",
96
- "Processing entries: 69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 69/100 [52:08<22:46, 44.07s/entry]\u001B[A\n",
97
- "Processing entries: 70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 70/100 [52:39<20:04, 40.14s/entry]\u001B[A\n",
98
- "Processing entries: 71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 71/100 [53:26<20:24, 42.22s/entry]\u001B[A\n",
99
- "Processing entries: 72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 72/100 [53:53<17:38, 37.80s/entry]\u001B[A\n",
100
- "Processing entries: 73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 73/100 [54:21<15:36, 34.67s/entry]\u001B[A\n",
101
- "Processing entries: 74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 74/100 [55:17<17:47, 41.06s/entry]\u001B[A\n",
102
- "Processing entries: 75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 75/100 [56:20<19:56, 47.84s/entry]\u001B[A\n",
103
- "Processing entries: 76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 76/100 [58:05<25:57, 64.89s/entry]\u001B[A\n",
104
- "Processing entries: 77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 77/100 [59:07<24:30, 63.95s/entry]\u001B[A\n",
105
- "Processing entries: 78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 78/100 [59:53<21:26, 58.48s/entry]\u001B[A\n",
106
- "Processing entries: 79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 79/100 [1:00:32<18:30, 52.89s/entry]\u001B[A\n",
107
- "Processing entries: 80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 80/100 [1:01:09<16:01, 48.10s/entry]\u001B[A\n",
108
- "Processing entries: 81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 81/100 [1:01:38<13:24, 42.37s/entry]\u001B[A\n",
109
- "Processing entries: 82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 82/100 [1:02:13<12:00, 40.02s/entry]\u001B[A"
110
- ]
111
- }
112
- ],
113
- "source": [
114
- "import pandas as pd\n",
115
- "from util.injection import process_scores_multiple\n",
116
- "from util.model import AzureAgent, GPTAgent, Claude3Agent\n",
117
- "from util.prompt import PROMPT_TEMPLATE\n",
118
- "\n",
119
- "def run_experiment(api_key, model_type, deployment_name, temperature, max_tokens, occupation,\n",
120
- " sample_size, group_name, privilege_label, protect_label, num_run, prompt_template, endpoint_url=None):\n",
121
- " # Load data\n",
122
- " df = pd.read_csv(\"resume_subsampled.csv\")\n",
123
- " \n",
124
- " # Filter data by occupation\n",
125
- " df = df[df[\"Occupation\"] == occupation]\n",
126
- " df = df.sample(n=sample_size, random_state=42)\n",
127
- " \n",
128
- " # Initialize the agent\n",
129
- " if model_type == 'AzureAgent':\n",
130
- " agent = AzureAgent(api_key, endpoint_url, deployment_name)\n",
131
- " elif model_type == 'GPTAgent':\n",
132
- " api_version = '2024-02-15-preview'\n",
133
- " agent = GPTAgent(api_key, endpoint_url, deployment_name, api_version)\n",
134
- " else:\n",
135
- " agent = Claude3Agent(api_key, deployment_name)\n",
136
- " \n",
137
- " # Process data\n",
138
- " parameters = {\"temperature\": temperature, \"max_tokens\": max_tokens}\n",
139
- " preprocessed_df = process_scores_multiple(df, num_run, parameters, privilege_label, protect_label, agent, group_name, occupation, prompt_template)\n",
140
- " \n",
141
- " return preprocessed_df\n",
142
- "\n",
143
- "# Set experiment parameters\n",
144
- "api_key = \"6c75a8235f204c9e8cf6228e485982f7\"\n",
145
- "model_type = \"GPTAgent\" # or \"AzureAgent\" or \"Claude3Agent\"\n",
146
- "deployment_name = \"gpt4-1106\"\n",
147
- "temperature = 0.0\n",
148
- "max_tokens = 300\n",
149
- "file_path = \"resume_subsampled.csv\" # or path to your file\n",
150
- "occupation = \"FINANCE\"\n",
151
- "sample_size = 100\n",
152
- "group_name = \"Gender\"\n",
153
- "privilege_label = \"Male\"\n",
154
- "protect_label = \"Female\"\n",
155
- "num_run = 1\n",
156
- "prompt_template = PROMPT_TEMPLATE\n",
157
- "endpoint_url = \"https://safeguard-monitor.openai.azure.com/\"\n",
158
- "\n",
159
- "# Run experiment\n",
160
- "results = run_experiment(api_key, model_type, deployment_name, temperature, max_tokens, occupation,\n",
161
- " sample_size, group_name, privilege_label, protect_label, num_run, prompt_template, endpoint_url)\n",
162
- "\n",
163
- "# Display results\n",
164
- "results.head()\n",
165
- "\n",
166
- "# Optionally save results to a CSV file\n",
167
- "results.to_csv(f'result/{occupation}_results.csv', index=False)\n"
168
- ]
169
- },
170
- {
171
- "cell_type": "code",
172
- "outputs": [],
173
- "source": [],
174
- "metadata": {
175
- "collapsed": false
176
- },
177
- "id": "43711da68c012a83"
178
- }
179
- ],
180
- "metadata": {
181
- "kernelspec": {
182
- "display_name": "Python 3",
183
- "language": "python",
184
- "name": "python3"
185
- },
186
- "language_info": {
187
- "codemirror_mode": {
188
- "name": "ipython",
189
- "version": 2
190
- },
191
- "file_extension": ".py",
192
- "mimetype": "text/x-python",
193
- "name": "python",
194
- "nbconvert_exporter": "python",
195
- "pygments_lexer": "ipython2",
196
- "version": "2.7.6"
197
- }
198
- },
199
- "nbformat": 4,
200
- "nbformat_minor": 5
201
- }