Spaces:
Sleeping
Sleeping
File size: 33,983 Bytes
8d121da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd \n",
"import requests \n",
"import datetime as dt\n",
"import re\n",
"import json\n",
"from tqdm import tqdm\n",
"import os\n",
"import time\n",
"from openai import OpenAI"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Calculate"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"if \"OPENAI_API_KEY\" not in os.environ:\n",
" with open('/home/sagemaker-user/Sciences-POC/config/secrets/keys.txt', 'r') as f:\n",
" keys = json.loads(f.read())\n",
"else : \n",
" keys=os.environ"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"save_path = '/home/sagemaker-user/Sciences-POC/data/outputs'\n",
"content_path = '/home/sagemaker-user/Sciences-POC/data/extract_sciences_po'\n",
"\n",
"\n",
"def retrieve_classifications(name, mapping_prompt):\n",
"\n",
" df = pd.read_csv('/home/sagemaker-user/Sciences-POC/data/extract_sciences_po.csv')\n",
"\n",
"\n",
" if os.path.exists(f\"{save_path}/output_{name}.txt\"):\n",
" with open(f\"{save_path}/output_{name}.txt\", 'r') as f : \n",
" out_dict = json.loads(f.read())\n",
" out_df = pd.DataFrame.from_dict(out_dict)\n",
" out = out_dict\n",
" else : \n",
" out_df = pd.DataFrame(columns = ['item_id', 'categorie_principale', 'categorie_secondaire'])\n",
" out = []\n",
"\n",
" df_to_process = df.loc[~df.item_id.isin(out_df.item_id)]\n",
"\n",
" if mapping_prompt[name]['client']=='deepseek':\n",
" client = OpenAI(api_key=keys[\"DEEPSEEK_API_KEY\"], base_url=\"https://api.deepseek.com\")\n",
" model=\"deepseek-chat\"\n",
" else:\n",
" client=OpenAI(api_key=keys['OPENAI_API_KEY'])\n",
" model=\"gpt-4o\"\n",
"\n",
" df_to_process = df.loc[~df.item_id.isin(out_df.item_id)]\n",
"\n",
"\n",
" with open(mapping_prompt[name]['path_prompt'], 'r') as f:\n",
" prompt = f.read()\n",
"\n",
" with tqdm(total=df_to_process.shape[0]) as pbar:\n",
" for i, row in df_to_process.iterrows():\n",
" titre_brut = f\"{row.item_id}_\"+row.titre.lower().strip().replace(f\"\\xa0\", ' ').replace(' : ', ':').replace(' ', '_').replace('/', '')\n",
" \n",
" with open(f'{content_path}/{titre_brut}.txt', 'r') as f:\n",
" text = f.read()\n",
"\n",
" messages = [{\"role\": \"system\", \"content\": prompt},\n",
" {\"role\": \"user\", \"content\": text}]\n",
"\n",
" response = client.chat.completions.create(\n",
" model=model,\n",
" messages=messages,\n",
" response_format={\n",
" 'type': 'json_object'\n",
" }\n",
" )\n",
" try : \n",
" cat_json = json.loads(response.choices[0].message.content)\n",
"\n",
" out.append({\n",
" 'item_id':row.item_id, \n",
" 'categorie_principale': cat_json['categorie_principale'],\n",
" 'categorie_secondaire': cat_json['categorie_secondaire'],\n",
" })\n",
" \n",
" with open(f'{save_path}/output_{name}.txt', 'w+') as f : \n",
" f.write(json.dumps(out))\n",
"\n",
" except Exception as e : \n",
" print(f'Error with article {row.item_id}')\n",
" pass\n",
"\n",
" \n",
" pbar.update(1)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dimanov_et_al\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/442 [00:02<?, ?it/s]\n"
]
},
{
"ename": "RateLimitError",
"evalue": "Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mRateLimitError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[4]\u001b[39m\u001b[32m, line 6\u001b[39m\n\u001b[32m 4\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m mapping.keys():\n\u001b[32m 5\u001b[39m \u001b[38;5;28mprint\u001b[39m(name)\n\u001b[32m----> \u001b[39m\u001b[32m6\u001b[39m \u001b[43mretrieve_classifications\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapping\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 44\u001b[39m, in \u001b[36mretrieve_classifications\u001b[39m\u001b[34m(name, mapping_prompt)\u001b[39m\n\u001b[32m 39\u001b[39m text = f.read()\n\u001b[32m 41\u001b[39m messages = [{\u001b[33m\"\u001b[39m\u001b[33mrole\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33msystem\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mcontent\u001b[39m\u001b[33m\"\u001b[39m: prompt},\n\u001b[32m 42\u001b[39m {\u001b[33m\"\u001b[39m\u001b[33mrole\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33muser\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mcontent\u001b[39m\u001b[33m\"\u001b[39m: text}]\n\u001b[32m---> \u001b[39m\u001b[32m44\u001b[39m response = \u001b[43mclient\u001b[49m\u001b[43m.\u001b[49m\u001b[43mchat\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcompletions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 45\u001b[39m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 46\u001b[39m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 47\u001b[39m \u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m=\u001b[49m\u001b[43m{\u001b[49m\n\u001b[32m 48\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mtype\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mjson_object\u001b[39;49m\u001b[33;43m'\u001b[39;49m\n\u001b[32m 49\u001b[39m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\n\u001b[32m 50\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 51\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m : \n\u001b[32m 52\u001b[39m cat_json = json.loads(response.choices[\u001b[32m0\u001b[39m].message.content)\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.cache/pypoetry/virtualenvs/sciences-poc-Bw010oVJ-py3.11/lib/python3.11/site-packages/openai/_utils/_utils.py:279\u001b[39m, in \u001b[36mrequired_args.<locals>.inner.<locals>.wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 277\u001b[39m msg = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[32m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 278\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[32m--> \u001b[39m\u001b[32m279\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.cache/pypoetry/virtualenvs/sciences-poc-Bw010oVJ-py3.11/lib/python3.11/site-packages/openai/resources/chat/completions/completions.py:914\u001b[39m, in \u001b[36mCompletions.create\u001b[39m\u001b[34m(self, messages, model, audio, frequency_penalty, function_call, functions, logit_bias, logprobs, max_completion_tokens, max_tokens, metadata, modalities, n, parallel_tool_calls, prediction, presence_penalty, reasoning_effort, response_format, seed, service_tier, stop, store, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, web_search_options, extra_headers, extra_query, extra_body, timeout)\u001b[39m\n\u001b[32m 871\u001b[39m \u001b[38;5;129m@required_args\u001b[39m([\u001b[33m\"\u001b[39m\u001b[33mmessages\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mmodel\u001b[39m\u001b[33m\"\u001b[39m], [\u001b[33m\"\u001b[39m\u001b[33mmessages\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mmodel\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mstream\u001b[39m\u001b[33m\"\u001b[39m])\n\u001b[32m 872\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mcreate\u001b[39m(\n\u001b[32m 873\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 911\u001b[39m timeout: \u001b[38;5;28mfloat\u001b[39m | httpx.Timeout | \u001b[38;5;28;01mNone\u001b[39;00m | NotGiven = NOT_GIVEN,\n\u001b[32m 912\u001b[39m ) -> ChatCompletion | Stream[ChatCompletionChunk]:\n\u001b[32m 913\u001b[39m validate_response_format(response_format)\n\u001b[32m--> \u001b[39m\u001b[32m914\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 915\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m/chat/completions\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 916\u001b[39m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 917\u001b[39m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[32m 918\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessages\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 919\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmodel\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 920\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43maudio\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43maudio\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 921\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mfrequency_penalty\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 922\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mfunction_call\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunction_call\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 923\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mfunctions\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunctions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 924\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mlogit_bias\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogit_bias\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 925\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mlogprobs\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 926\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmax_completion_tokens\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_completion_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 927\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmax_tokens\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 928\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmetadata\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 929\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmodalities\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodalities\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 930\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mn\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 931\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mparallel_tool_calls\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mparallel_tool_calls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 932\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mprediction\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mprediction\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 933\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mpresence_penalty\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mpresence_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 934\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mreasoning_effort\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mreasoning_effort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 935\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mresponse_format\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 936\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mseed\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 937\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mservice_tier\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mservice_tier\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 938\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstop\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 939\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstore\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstore\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 940\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstream\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 941\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstream_options\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 942\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtemperature\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemperature\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 943\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtool_choice\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 944\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtools\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 945\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtop_logprobs\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_logprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 946\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtop_p\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_p\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 947\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43muser\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43muser\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 948\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mweb_search_options\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mweb_search_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 949\u001b[39m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 950\u001b[39m \u001b[43m \u001b[49m\u001b[43mcompletion_create_params\u001b[49m\u001b[43m.\u001b[49m\u001b[43mCompletionCreateParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 951\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 952\u001b[39m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 953\u001b[39m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\n\u001b[32m 954\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 955\u001b[39m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m=\u001b[49m\u001b[43mChatCompletion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 956\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 957\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m=\u001b[49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mChatCompletionChunk\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 958\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.cache/pypoetry/virtualenvs/sciences-poc-Bw010oVJ-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1242\u001b[39m, in \u001b[36mSyncAPIClient.post\u001b[39m\u001b[34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[39m\n\u001b[32m 1228\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mpost\u001b[39m(\n\u001b[32m 1229\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m 1230\u001b[39m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 1237\u001b[39m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[32m 1238\u001b[39m ) -> ResponseT | _StreamT:\n\u001b[32m 1239\u001b[39m opts = FinalRequestOptions.construct(\n\u001b[32m 1240\u001b[39m method=\u001b[33m\"\u001b[39m\u001b[33mpost\u001b[39m\u001b[33m\"\u001b[39m, url=path, json_data=body, files=to_httpx_files(files), **options\n\u001b[32m 1241\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m1242\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.cache/pypoetry/virtualenvs/sciences-poc-Bw010oVJ-py3.11/lib/python3.11/site-packages/openai/_base_client.py:919\u001b[39m, in \u001b[36mSyncAPIClient.request\u001b[39m\u001b[34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[39m\n\u001b[32m 916\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 917\u001b[39m retries_taken = \u001b[32m0\u001b[39m\n\u001b[32m--> \u001b[39m\u001b[32m919\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 920\u001b[39m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 921\u001b[39m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m=\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 922\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 923\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 924\u001b[39m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 925\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.cache/pypoetry/virtualenvs/sciences-poc-Bw010oVJ-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1008\u001b[39m, in \u001b[36mSyncAPIClient._request\u001b[39m\u001b[34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[39m\n\u001b[32m 1006\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries > \u001b[32m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m._should_retry(err.response):\n\u001b[32m 1007\u001b[39m err.response.close()\n\u001b[32m-> \u001b[39m\u001b[32m1008\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1009\u001b[39m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1010\u001b[39m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1011\u001b[39m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1012\u001b[39m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[43m=\u001b[49m\u001b[43merr\u001b[49m\u001b[43m.\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m.\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1013\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1014\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1015\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1017\u001b[39m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[32m 1018\u001b[39m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[32m 1019\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err.response.is_closed:\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.cache/pypoetry/virtualenvs/sciences-poc-Bw010oVJ-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1057\u001b[39m, in \u001b[36mSyncAPIClient._retry_request\u001b[39m\u001b[34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[39m\n\u001b[32m 1053\u001b[39m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[32m 1054\u001b[39m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[32m 1055\u001b[39m time.sleep(timeout)\n\u001b[32m-> \u001b[39m\u001b[32m1057\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1058\u001b[39m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m=\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1059\u001b[39m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1060\u001b[39m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 1061\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1062\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1063\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.cache/pypoetry/virtualenvs/sciences-poc-Bw010oVJ-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1008\u001b[39m, in \u001b[36mSyncAPIClient._request\u001b[39m\u001b[34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[39m\n\u001b[32m 1006\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries > \u001b[32m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m._should_retry(err.response):\n\u001b[32m 1007\u001b[39m err.response.close()\n\u001b[32m-> \u001b[39m\u001b[32m1008\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1009\u001b[39m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1010\u001b[39m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1011\u001b[39m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1012\u001b[39m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[43m=\u001b[49m\u001b[43merr\u001b[49m\u001b[43m.\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m.\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1013\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1014\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1015\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1017\u001b[39m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[32m 1018\u001b[39m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[32m 1019\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err.response.is_closed:\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.cache/pypoetry/virtualenvs/sciences-poc-Bw010oVJ-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1057\u001b[39m, in \u001b[36mSyncAPIClient._retry_request\u001b[39m\u001b[34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[39m\n\u001b[32m 1053\u001b[39m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[32m 1054\u001b[39m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[32m 1055\u001b[39m time.sleep(timeout)\n\u001b[32m-> \u001b[39m\u001b[32m1057\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1058\u001b[39m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m=\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1059\u001b[39m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1060\u001b[39m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 1061\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1062\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1063\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.cache/pypoetry/virtualenvs/sciences-poc-Bw010oVJ-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1023\u001b[39m, in \u001b[36mSyncAPIClient._request\u001b[39m\u001b[34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[39m\n\u001b[32m 1020\u001b[39m err.response.read()\n\u001b[32m 1022\u001b[39m log.debug(\u001b[33m\"\u001b[39m\u001b[33mRe-raising status error\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m-> \u001b[39m\u001b[32m1023\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m._make_status_error_from_response(err.response) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1025\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._process_response(\n\u001b[32m 1026\u001b[39m cast_to=cast_to,\n\u001b[32m 1027\u001b[39m options=options,\n\u001b[32m (...)\u001b[39m\u001b[32m 1031\u001b[39m retries_taken=retries_taken,\n\u001b[32m 1032\u001b[39m )\n",
"\u001b[31mRateLimitError\u001b[39m: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}"
]
}
],
"source": [
"with open('/home/sagemaker-user/Sciences-POC/config/mapping_prompts.txt', 'r') as f : \n",
" mapping = json.loads(f.read())\n",
"\n",
"for name in mapping.keys():\n",
" print(name)\n",
" retrieve_classifications(name, mapping)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Ajouter images"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|