In [None]:
import os
import json
from dotenv import load_dotenv
from supabase.client import Client, create_client
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.schema import Document

load_dotenv()

 from .autonotebook import tqdm as notebook_tqdm


In [None]:
supabase: Client = create_client(
 os.environ.get("SUPABASE_URL"), 
 os.environ.get("SUPABASE_SERVICE_KEY"))

embeddings = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-modernbert-base")

In [15]:
with open('metadata.jsonl', 'r') as jsonl_file:
 json_list = list(jsonl_file)

documents = []
for json_str in json_list:
 json_data = json.loads(json_str)
 content = f"Question : {json_data['Question']}\n\nFinal answer : {json_data['Final answer']}"
 embedding = embeddings.embed_query(content)
 document = {
 "content" : content,
 "metadata" : {
 "source" : json_data['task_id']
 },
 "embedding" : embedding,
 }
 documents.append(document)

In [None]:
# pgvector needs to be enabled, to turn to vector database
# Table needs to be created beforehand in Supabase, with column types
try:
 response = (
 supabase.table("gaia_documents")
 .insert(documents)
 .execute()
 )
except Exception as exception:
 print("Error inserting data into Supabase:", exception)