Adarsh-aot commited on
Commit
94447f8
·
verified ·
1 Parent(s): f0c50f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -6
app.py CHANGED
@@ -5,18 +5,64 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
  from transformers import pipeline
6
  from langchain_community.llms import HuggingFacePipeline
7
 
8
- # Initialize ChromaDB client
9
- chroma_client = chromadb.PersistentClient(path="data_db")
10
 
11
- # Define the embedding function
12
- sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-mpnet-base-v2")
13
 
14
- # Get or create a collection
15
- collection = chroma_client.get_or_create_collection(name="my_collection", embedding_function=sentence_transformer_ef)
16
 
17
  # Streamlit UI elements
18
  st.title("ChromaDB and HuggingFace Pipeline Integration")
19
  query = st.text_input("Enter your query:", value="director")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  if st.button("Search"):
22
  # Query the collection
 
5
  from transformers import pipeline
6
  from langchain_community.llms import HuggingFacePipeline
7
 
8
+ # # Initialize ChromaDB client
9
+ # chroma_client = chromadb.PersistentClient(path="data_db")
10
 
11
+ # # Define the embedding function
12
+ # sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-mpnet-base-v2")
13
 
14
+ # # Get or create a collection
15
+ # collection = chroma_client.get_or_create_collection(name="my_collection", embedding_function=sentence_transformer_ef)
16
 
17
  # Streamlit UI elements
18
  st.title("ChromaDB and HuggingFace Pipeline Integration")
19
  query = st.text_input("Enter your query:", value="director")
20
+ import csv
21
+ import chromadb
22
+ from chromadb.utils import embedding_functions
23
+
24
+
25
+
26
+ with open('./output.csv' , encoding="utf-8") as file:
27
+ lines = csv.reader(file)
28
+
29
+
30
+ documents = []
31
+
32
+
33
+ metadatas = []
34
+
35
+
36
+ ids = []
37
+ id = 1
38
+
39
+
40
+ for i, line in enumerate(lines):
41
+ if i == 0:
42
+
43
+ continue
44
+
45
+ documents.append(line[0])
46
+ metadatas.append({"item_id": line[1]})
47
+ ids.append(str(id))
48
+ id += 1
49
+
50
+
51
+ chroma_client = chromadb.PersistentClient(path="db")
52
+
53
+
54
+ sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-mpnet-base-v2")
55
+
56
+
57
+ collection = chroma_client.get_or_create_collection(name="my_collection", embedding_function=sentence_transformer_ef)
58
+
59
+
60
+ collection.add(
61
+ documents=documents,
62
+ metadatas=metadatas,
63
+ ids=ids
64
+ )
65
+
66
 
67
  if st.button("Search"):
68
  # Query the collection