buster / embed_documents.py
jerpint's picture
Update prompts (#3)
51727c4 unverified
raw
history blame
694 Bytes
import openai
import pandas as pd
from buster.documents import DeepLakeDocumentsManager
from utils import zip_contents
def read_csv(filename: str):
"""Assumes a pre-chunked csv file is provided with expected columns."""
df = pd.read_csv(filename)
for col in ["url", "source", "title", "content"]:
assert col in df.columns
return df
if __name__ == "__main__":
vector_store_path = "deeplake_store"
chunk_file = "data/outputs.csv"
overwrite = True
df = read_csv(chunk_file)
dm = DeepLakeDocumentsManager(vector_store_path, overwrite=overwrite)
dm.add(df)
zipped_file_path = dm.to_zip()
print(f"Contents zipped to: {zipped_file_path}")