''' load dataset: https://huggingface.co/docs/datasets/loading#hugging-face-hub ''' from datasets import load_from_disk, Dataset from huggingface_hub import hf_hub_download from datasets import load_dataset import faiss # load wikipedia dataset datasetx = load_dataset("JosueElias/pipeline_dataset2") # load faiss file and get route of file path2 = hf_hub_download(repo_id="JosueElias/pipeline_faiss", filename="faiss.index", repo_type="dataset") # save wikipedia dataset locally datasetx.save_to_disk("./directory") # delete variable to have more memory space del datasetx # load dataset again in arrow format datasetx = load_from_disk("./directory/train") # load faiss to dataset datasetx.load_faiss_index('embeddings', path2)