{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "a9f7a25f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/kpatelis/projects/Agents_Course_Assignment/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import os\n", "import json\n", "from dotenv import load_dotenv\n", "from supabase.client import Client, create_client\n", "from langchain_huggingface import HuggingFaceEmbeddings\n", "from langchain.schema import Document\n", "\n", "load_dotenv()" ] }, { "cell_type": "code", "execution_count": null, "id": "2c948d46", "metadata": {}, "outputs": [], "source": [ "supabase: Client = create_client(\n", " os.environ.get(\"SUPABASE_URL\"), \n", " os.environ.get(\"SUPABASE_SERVICE_KEY\"))\n", "\n", "embeddings = HuggingFaceEmbeddings(model_name=\"Alibaba-NLP/gte-modernbert-base\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "f2c5492b", "metadata": {}, "outputs": [], "source": [ "with open('metadata.jsonl', 'r') as jsonl_file:\n", " json_list = list(jsonl_file)\n", "\n", "documents = []\n", "for json_str in json_list:\n", " json_data = json.loads(json_str)\n", " content = f\"Question : {json_data['Question']}\\n\\nFinal answer : {json_data['Final answer']}\"\n", " embedding = embeddings.embed_query(content)\n", " document = {\n", " \"content\" : content,\n", " \"metadata\" : {\n", " \"source\" : json_data['task_id']\n", " },\n", " \"embedding\" : embedding,\n", " }\n", " documents.append(document)" ] }, { "cell_type": "code", "execution_count": null, "id": "26ddbafd", "metadata": {}, "outputs": [], "source": [ "# pgvector needs to be enabled, to turn to vector database\n", "# Table needs to be created beforehand in Supabase, with column types\n", "try:\n", " response = (\n", " supabase.table(\"gaia_documents\")\n", " .insert(documents)\n", " .execute()\n", " )\n", "except Exception as exception:\n", " print(\"Error inserting data into Supabase:\", exception)" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }