broadfield-dev commited on
Commit
90e461b
·
verified ·
1 Parent(s): e92f37c

Create save_to_hf.py

Browse files
Files changed (1) hide show
  1. save_to_hf.py +26 -0
save_to_hf.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # save_to_hf.py
2
+ from datasets import Dataset
3
+ import chromadb
4
+ from database import init_chromadb, create_collection
5
+
6
+ def save_chromadb_to_hf(dataset_name="python_program_vectors"):
7
+ client = init_chromadb()
8
+ collection = create_collection(client)
9
+
10
+ # Fetch all data from ChromaDB
11
+ results = collection.get(include=["documents", "metadatas", "embeddings"])
12
+ data = {
13
+ "code": results["documents"],
14
+ "sequence": [meta["sequence"] for meta in results["metadatas"]],
15
+ "vectors": results["embeddings"]
16
+ }
17
+
18
+ # Create a Hugging Face Dataset
19
+ dataset = Dataset.from_dict(data)
20
+
21
+ # Push to Hugging Face Hub
22
+ dataset.push_to_hub(dataset_name, token="YOUR_HUGGINGFACE_TOKEN")
23
+ print(f"Dataset pushed to Hugging Face Hub as {dataset_name}")
24
+
25
+ if __name__ == "__main__":
26
+ save_chromadb_to_hf()