Spaces:
Running
Running
import openai | |
import pandas as pd | |
from buster.documents import DeepLakeDocumentsManager | |
from utils import zip_contents | |
def read_csv(filename: str): | |
"""Assumes a pre-chunked csv file is provided with expected columns.""" | |
df = pd.read_csv(filename) | |
for col in ["url", "source", "title", "content"]: | |
assert col in df.columns | |
return df | |
if __name__ == "__main__": | |
vector_store_path = "deeplake_store" | |
chunk_file = "data/outputs.csv" | |
overwrite = True | |
df = read_csv(chunk_file) | |
dm = DeepLakeDocumentsManager(vector_store_path, overwrite=overwrite) | |
dm.add(df) | |
zipped_file_path = dm.to_zip() | |
print(f"Contents zipped to: {zipped_file_path}") | |