Spaces:
Sleeping
Sleeping
Vela
commited on
Commit
·
c038c12
1
Parent(s):
a8dda00
modified chroma db
Browse files- src/backend/data/chroma_db.py +63 -63
- src/backend/models/llm_model.py +1 -1
src/backend/data/chroma_db.py
CHANGED
@@ -1,73 +1,73 @@
|
|
1 |
-
import chromadb
|
2 |
-
import asyncio
|
3 |
-
from utils import logger
|
4 |
-
from chromadb.utils import embedding_functions
|
5 |
-
default_ef = embedding_functions.DefaultEmbeddingFunction()
|
6 |
|
7 |
-
logger = logger.get_logger()
|
8 |
|
9 |
-
# Constants
|
10 |
-
COLLECTION_NAME = "care_companion_ai_vectors"
|
11 |
-
DB_PATH = "./src/backend/vector-db"
|
12 |
|
13 |
-
# Initialize ChromaDB Client
|
14 |
-
client = chromadb.PersistentClient(path=DB_PATH)
|
15 |
|
16 |
|
17 |
|
18 |
-
# chroma_client = chromadb.HttpClient(host='localhost', port=8000)
|
19 |
-
# client = chromadb.AsyncHttpClient()
|
20 |
|
21 |
-
collection = client.get_or_create_collection(
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
|
30 |
-
def add_data_to_vector_store(df):
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
55 |
-
def search_vector_store(query, n_result : int = 3):
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
|
68 |
-
def get_retrieved_context(prompt: str) -> str:
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
|
|
1 |
+
# import chromadb
|
2 |
+
# import asyncio
|
3 |
+
# from utils import logger
|
4 |
+
# from chromadb.utils import embedding_functions
|
5 |
+
# default_ef = embedding_functions.DefaultEmbeddingFunction()
|
6 |
|
7 |
+
# logger = logger.get_logger()
|
8 |
|
9 |
+
# # Constants
|
10 |
+
# COLLECTION_NAME = "care_companion_ai_vectors"
|
11 |
+
# DB_PATH = "./src/backend/vector-db"
|
12 |
|
13 |
+
# # Initialize ChromaDB Client
|
14 |
+
# client = chromadb.PersistentClient(path=DB_PATH)
|
15 |
|
16 |
|
17 |
|
18 |
+
# # chroma_client = chromadb.HttpClient(host='localhost', port=8000)
|
19 |
+
# # client = chromadb.AsyncHttpClient()
|
20 |
|
21 |
+
# collection = client.get_or_create_collection(
|
22 |
+
# name=COLLECTION_NAME,
|
23 |
+
# embedding_function=default_ef,
|
24 |
+
# metadata={
|
25 |
+
# "description": "yuvabe care companion ai chroma collection",
|
26 |
+
# "hnsw:space": "cosine",
|
27 |
+
# "hnsw:search_ef": 100
|
28 |
+
# })
|
29 |
|
30 |
+
# def add_data_to_vector_store(df):
|
31 |
+
# try:
|
32 |
+
# logger.info("Started upserting the data to database")
|
33 |
+
# for index, row in df.iterrows():
|
34 |
+
# input_text = row['input']
|
35 |
+
# output_text = row['output']
|
36 |
+
# instruction_text = row['instruction']
|
37 |
+
# if not isinstance(input_text, str) or not input_text.strip():
|
38 |
+
# logger.warning(f"Skipping row {index} due to empty or invalid input text.")
|
39 |
+
# continue
|
40 |
+
# row_dict = {
|
41 |
+
# "question": input_text,
|
42 |
+
# "answer" : output_text,
|
43 |
+
# "instruction": instruction_text
|
44 |
+
# }
|
45 |
+
# collection.upsert(
|
46 |
+
# documents=input_text,
|
47 |
+
# metadatas=row_dict,
|
48 |
+
# ids=f"id{index}"
|
49 |
+
# )
|
50 |
+
# logger.info(f"Successfully upserted {index} records.")
|
51 |
+
# logger.info("Successfully upserted all the records.")
|
52 |
+
# except Exception as e:
|
53 |
+
# logger.exception(f"Unable to upsert the data to the database: {e}")
|
54 |
|
55 |
+
# def search_vector_store(query, n_result : int = 3):
|
56 |
+
# try:
|
57 |
+
# logger.info("Trying to fetch the data from database")
|
58 |
+
# response = collection.query(
|
59 |
+
# query_texts=[query],
|
60 |
+
# n_results=n_result,
|
61 |
+
# include=["metadatas","distances","documents"]
|
62 |
+
# )
|
63 |
+
# logger.info("Successfully fetched the data from database")
|
64 |
+
# return response
|
65 |
+
# except Exception as e:
|
66 |
+
# logger.exception("Failed to fetch the data from database")
|
67 |
|
68 |
+
# def get_retrieved_context(prompt: str) -> str:
|
69 |
+
# response = search_vector_store(prompt)
|
70 |
+
# if response and "metadatas" in response and response["metadatas"]:
|
71 |
+
# retrieved_contexts = [metadata["answer"] for metadata in response["metadatas"][0]]
|
72 |
+
# return "\n".join(retrieved_contexts[:3])
|
73 |
+
# return "No relevant information found in the database."
|
src/backend/models/llm_model.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import os
|
2 |
from groq import Groq
|
3 |
from utils import logger
|
4 |
-
from data import chroma_db
|
5 |
from data import pinecone_db
|
6 |
from dotenv import load_dotenv
|
7 |
|
|
|
1 |
import os
|
2 |
from groq import Groq
|
3 |
from utils import logger
|
4 |
+
# from data import chroma_db
|
5 |
from data import pinecone_db
|
6 |
from dotenv import load_dotenv
|
7 |
|