Vela commited on
Commit
c038c12
·
1 Parent(s): a8dda00

modified chroma db

Browse files
src/backend/data/chroma_db.py CHANGED
@@ -1,73 +1,73 @@
1
- import chromadb
2
- import asyncio
3
- from utils import logger
4
- from chromadb.utils import embedding_functions
5
- default_ef = embedding_functions.DefaultEmbeddingFunction()
6
 
7
- logger = logger.get_logger()
8
 
9
- # Constants
10
- COLLECTION_NAME = "care_companion_ai_vectors"
11
- DB_PATH = "./src/backend/vector-db"
12
 
13
- # Initialize ChromaDB Client
14
- client = chromadb.PersistentClient(path=DB_PATH)
15
 
16
 
17
 
18
- # chroma_client = chromadb.HttpClient(host='localhost', port=8000)
19
- # client = chromadb.AsyncHttpClient()
20
 
21
- collection = client.get_or_create_collection(
22
- name=COLLECTION_NAME,
23
- embedding_function=default_ef,
24
- metadata={
25
- "description": "yuvabe care companion ai chroma collection",
26
- "hnsw:space": "cosine",
27
- "hnsw:search_ef": 100
28
- })
29
 
30
- def add_data_to_vector_store(df):
31
- try:
32
- logger.info("Started upserting the data to database")
33
- for index, row in df.iterrows():
34
- input_text = row['input']
35
- output_text = row['output']
36
- instruction_text = row['instruction']
37
- if not isinstance(input_text, str) or not input_text.strip():
38
- logger.warning(f"Skipping row {index} due to empty or invalid input text.")
39
- continue
40
- row_dict = {
41
- "question": input_text,
42
- "answer" : output_text,
43
- "instruction": instruction_text
44
- }
45
- collection.upsert(
46
- documents=input_text,
47
- metadatas=row_dict,
48
- ids=f"id{index}"
49
- )
50
- logger.info(f"Successfully upserted {index} records.")
51
- logger.info("Successfully upserted all the records.")
52
- except Exception as e:
53
- logger.exception(f"Unable to upsert the data to the database: {e}")
54
 
55
- def search_vector_store(query, n_result : int = 3):
56
- try:
57
- logger.info("Trying to fetch the data from database")
58
- response = collection.query(
59
- query_texts=[query],
60
- n_results=n_result,
61
- include=["metadatas","distances","documents"]
62
- )
63
- logger.info("Successfully fetched the data from database")
64
- return response
65
- except Exception as e:
66
- logger.exception("Failed to fetch the data from database")
67
 
68
- def get_retrieved_context(prompt: str) -> str:
69
- response = search_vector_store(prompt)
70
- if response and "metadatas" in response and response["metadatas"]:
71
- retrieved_contexts = [metadata["answer"] for metadata in response["metadatas"][0]]
72
- return "\n".join(retrieved_contexts[:3])
73
- return "No relevant information found in the database."
 
1
+ # import chromadb
2
+ # import asyncio
3
+ # from utils import logger
4
+ # from chromadb.utils import embedding_functions
5
+ # default_ef = embedding_functions.DefaultEmbeddingFunction()
6
 
7
+ # logger = logger.get_logger()
8
 
9
+ # # Constants
10
+ # COLLECTION_NAME = "care_companion_ai_vectors"
11
+ # DB_PATH = "./src/backend/vector-db"
12
 
13
+ # # Initialize ChromaDB Client
14
+ # client = chromadb.PersistentClient(path=DB_PATH)
15
 
16
 
17
 
18
+ # # chroma_client = chromadb.HttpClient(host='localhost', port=8000)
19
+ # # client = chromadb.AsyncHttpClient()
20
 
21
+ # collection = client.get_or_create_collection(
22
+ # name=COLLECTION_NAME,
23
+ # embedding_function=default_ef,
24
+ # metadata={
25
+ # "description": "yuvabe care companion ai chroma collection",
26
+ # "hnsw:space": "cosine",
27
+ # "hnsw:search_ef": 100
28
+ # })
29
 
30
+ # def add_data_to_vector_store(df):
31
+ # try:
32
+ # logger.info("Started upserting the data to database")
33
+ # for index, row in df.iterrows():
34
+ # input_text = row['input']
35
+ # output_text = row['output']
36
+ # instruction_text = row['instruction']
37
+ # if not isinstance(input_text, str) or not input_text.strip():
38
+ # logger.warning(f"Skipping row {index} due to empty or invalid input text.")
39
+ # continue
40
+ # row_dict = {
41
+ # "question": input_text,
42
+ # "answer" : output_text,
43
+ # "instruction": instruction_text
44
+ # }
45
+ # collection.upsert(
46
+ # documents=input_text,
47
+ # metadatas=row_dict,
48
+ # ids=f"id{index}"
49
+ # )
50
+ # logger.info(f"Successfully upserted {index} records.")
51
+ # logger.info("Successfully upserted all the records.")
52
+ # except Exception as e:
53
+ # logger.exception(f"Unable to upsert the data to the database: {e}")
54
 
55
+ # def search_vector_store(query, n_result : int = 3):
56
+ # try:
57
+ # logger.info("Trying to fetch the data from database")
58
+ # response = collection.query(
59
+ # query_texts=[query],
60
+ # n_results=n_result,
61
+ # include=["metadatas","distances","documents"]
62
+ # )
63
+ # logger.info("Successfully fetched the data from database")
64
+ # return response
65
+ # except Exception as e:
66
+ # logger.exception("Failed to fetch the data from database")
67
 
68
+ # def get_retrieved_context(prompt: str) -> str:
69
+ # response = search_vector_store(prompt)
70
+ # if response and "metadatas" in response and response["metadatas"]:
71
+ # retrieved_contexts = [metadata["answer"] for metadata in response["metadatas"][0]]
72
+ # return "\n".join(retrieved_contexts[:3])
73
+ # return "No relevant information found in the database."
src/backend/models/llm_model.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  from groq import Groq
3
  from utils import logger
4
- from data import chroma_db
5
  from data import pinecone_db
6
  from dotenv import load_dotenv
7
 
 
1
  import os
2
  from groq import Groq
3
  from utils import logger
4
+ # from data import chroma_db
5
  from data import pinecone_db
6
  from dotenv import load_dotenv
7