Spaces:
Sleeping
Sleeping
Vela
commited on
Commit
·
a8dda00
1
Parent(s):
6628e9e
added pinecone for project
Browse files- src/backend/__pycache__/main.cpython-313.pyc +0 -0
- src/backend/data/__pycache__/dataset.cpython-313.pyc +0 -0
- src/backend/data/__pycache__/pinecone_db.cpython-313.pyc +0 -0
- src/backend/data/dataset.py +20 -16
- src/backend/data/pinecone_db.py +41 -48
- src/backend/main.py +2 -2
- src/backend/models/__pycache__/embedding_model.cpython-313.pyc +0 -0
- src/backend/models/__pycache__/llm_model.cpython-313.pyc +0 -0
- src/backend/models/__pycache__/schemas.cpython-313.pyc +0 -0
- src/backend/models/embedding_model.py +1 -2
- src/backend/models/llm_model.py +3 -2
- src/backend/models/schemas.py +5 -1
- src/backend/routes/__pycache__/upsert_data.cpython-313.pyc +0 -0
- src/backend/routes/upsert_data.py +17 -13
- src/frontend/app/__pycache__/common_fuctions.cpython-313.pyc +0 -0
- src/frontend/app/__pycache__/homepage.cpython-313.pyc +0 -0
- src/frontend/app/common_fuctions.py +19 -1
- src/frontend/app/homepage.py +2 -2
- src/frontend/pages/database_response_page.py +3 -4
- src/frontend/pages/dataloader_page.py +43 -0
src/backend/__pycache__/main.cpython-313.pyc
CHANGED
Binary files a/src/backend/__pycache__/main.cpython-313.pyc and b/src/backend/__pycache__/main.cpython-313.pyc differ
|
|
src/backend/data/__pycache__/dataset.cpython-313.pyc
CHANGED
Binary files a/src/backend/data/__pycache__/dataset.cpython-313.pyc and b/src/backend/data/__pycache__/dataset.cpython-313.pyc differ
|
|
src/backend/data/__pycache__/pinecone_db.cpython-313.pyc
CHANGED
Binary files a/src/backend/data/__pycache__/pinecone_db.cpython-313.pyc and b/src/backend/data/__pycache__/pinecone_db.cpython-313.pyc differ
|
|
src/backend/data/dataset.py
CHANGED
@@ -9,29 +9,33 @@ DATASET_PATH = "src/backend/data/dataset.csv"
|
|
9 |
PARAQUET_DATASET_PATH = "hf://datasets/lavita/ChatDoctor-HealthCareMagic-100k/data/train-00000-of-00001-5e7cb295b9cff0bf.parquet"
|
10 |
|
11 |
def get_data_set():
|
12 |
-
|
13 |
try:
|
14 |
if not os.path.exists(DATASET_PATH):
|
15 |
logger.info(f"{DATASET_PATH} not found. Reading from Parquet file.")
|
16 |
df = pd.read_parquet(PARAQUET_DATASET_PATH)
|
17 |
-
df.drop_duplicates(subset=["input", "output"], inplace=True)
|
18 |
-
df.dropna(subset=["input", "output"], inplace=True) # Remove NaNs first
|
19 |
-
|
20 |
-
# This line is to remove the empty column or column with only spaces
|
21 |
-
df = df[(df["input"].str.strip() != "") & (df["output"].str.strip() != "")] # Remove empty strings/spaces
|
22 |
-
|
23 |
-
# This line is to remove puncuation and emjois
|
24 |
-
translator = str.maketrans('', '', string.punctuation)
|
25 |
-
df["input"] = df["input"].str.lower().str.translate(translator)
|
26 |
-
df["output"] = df["output"].str.lower().str.translate(translator)
|
27 |
-
df.to_csv(DATASET_PATH, index=False)
|
28 |
-
logger.info(f"CSV file created and cleaned at: {DATASET_PATH}")
|
29 |
else:
|
30 |
logger.info(f"Loading existing dataset from: {DATASET_PATH}")
|
31 |
-
df = pd.read_csv(DATASET_PATH)
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
return df
|
34 |
|
35 |
except Exception as e:
|
36 |
logger.error(f"Error while loading dataset: {e}", exc_info=True)
|
37 |
-
return None
|
|
|
9 |
PARAQUET_DATASET_PATH = "hf://datasets/lavita/ChatDoctor-HealthCareMagic-100k/data/train-00000-of-00001-5e7cb295b9cff0bf.parquet"
|
10 |
|
11 |
def get_data_set():
|
|
|
12 |
try:
|
13 |
if not os.path.exists(DATASET_PATH):
|
14 |
logger.info(f"{DATASET_PATH} not found. Reading from Parquet file.")
|
15 |
df = pd.read_parquet(PARAQUET_DATASET_PATH)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
else:
|
17 |
logger.info(f"Loading existing dataset from: {DATASET_PATH}")
|
18 |
+
df = pd.read_csv(DATASET_PATH).fillna("")
|
19 |
+
|
20 |
+
# Cleaning logic for both Parquet and CSV data
|
21 |
+
df.drop_duplicates(subset=["input", "output"], inplace=True)
|
22 |
+
|
23 |
+
# Remove NaN values or empty strings
|
24 |
+
df = df[df["input"].str.strip().notna() & df["output"].str.strip().notna()]
|
25 |
+
df = df[(df["input"].str.strip() != "") & (df["output"].str.strip() != "")]
|
26 |
+
|
27 |
+
# Clean punctuation and emojis
|
28 |
+
translator = str.maketrans('', '', string.punctuation)
|
29 |
+
df["input"] = df["input"].fillna("").str.lower().str.translate(translator)
|
30 |
+
df["output"] = df["output"].fillna("").str.lower().str.translate(translator)
|
31 |
+
|
32 |
+
# Save only if data is present
|
33 |
+
if not os.path.exists(DATASET_PATH):
|
34 |
+
df.to_csv(DATASET_PATH, index=False)
|
35 |
+
logger.info(f"CSV file created and cleaned at: {DATASET_PATH}")
|
36 |
+
|
37 |
return df
|
38 |
|
39 |
except Exception as e:
|
40 |
logger.error(f"Error while loading dataset: {e}", exc_info=True)
|
41 |
+
return None
|
src/backend/data/pinecone_db.py
CHANGED
@@ -61,37 +61,6 @@ def get_index():
|
|
61 |
|
62 |
index = get_index()
|
63 |
|
64 |
-
def process_and_upsert_data(index, data: pd.DataFrame):
|
65 |
-
|
66 |
-
# Validate if the required columns exist in the row (Series)
|
67 |
-
try:
|
68 |
-
logger.info("Started upserting the data to database")
|
69 |
-
for idx, row in data.iterrows():
|
70 |
-
logger.info(f"Processing row {row['input']}")
|
71 |
-
input_text = row['input']
|
72 |
-
output_text = row['output']
|
73 |
-
instruction_text = row['instruction']
|
74 |
-
if not isinstance(input_text, str) or not input_text.strip():
|
75 |
-
logger.warning(f"Skipping row {idx} due to empty or invalid input text.")
|
76 |
-
continue
|
77 |
-
row_dict = {
|
78 |
-
"question": input_text,
|
79 |
-
"answer" : output_text,
|
80 |
-
"instruction": instruction_text
|
81 |
-
}
|
82 |
-
embeddings = embedding_model.get_text_embedding(row['input'])
|
83 |
-
index.upsert(
|
84 |
-
vectors=[{
|
85 |
-
"id": f"id{idx}",
|
86 |
-
"values": embeddings,
|
87 |
-
"metadata":row_dict
|
88 |
-
}],
|
89 |
-
namespace=NAMESPACE,
|
90 |
-
)
|
91 |
-
logger.info(f"Successfully upserted data for question {input_text} with answer {output_text}")
|
92 |
-
except Exception as e:
|
93 |
-
logger.error(f"Error processing row with index {idx}: {e}")
|
94 |
-
|
95 |
def search_vector_store(query, n_result : int = 3) -> list[dict]:
|
96 |
"""
|
97 |
Searches the vector store for the most relevant matches based on the given query.
|
@@ -138,27 +107,51 @@ def get_retrieved_context(prompt: str) -> str:
|
|
138 |
return "\n".join(retrieved_contexts[:3])
|
139 |
return "No relevant information found in the database."
|
140 |
|
141 |
-
df
|
142 |
-
# process_and_upsert_data(index, data_set)
|
143 |
-
# response = search_vector_store("What is the treatment for diabetes?")
|
144 |
-
# print(response)
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
-
# Upload data to Pinecone in batches
|
151 |
-
BATCH_SIZE =
|
152 |
-
vectors = []
|
153 |
|
154 |
-
for i in tqdm(range(0, len(df), BATCH_SIZE), desc="
|
155 |
batch = df.iloc[i : i + BATCH_SIZE]
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
-
|
163 |
|
164 |
-
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
index = get_index()
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
def search_vector_store(query, n_result : int = 3) -> list[dict]:
|
65 |
"""
|
66 |
Searches the vector store for the most relevant matches based on the given query.
|
|
|
107 |
return "\n".join(retrieved_contexts[:3])
|
108 |
return "No relevant information found in the database."
|
109 |
|
110 |
+
def upsert_data_in_db(df: pd.DataFrame):
|
|
|
|
|
|
|
111 |
|
112 |
+
"""
|
113 |
+
Generates embeddings for the given DataFrame and uploads data to Pinecone in batches.
|
114 |
+
|
115 |
+
Parameters:
|
116 |
+
- df (pd.DataFrame): DataFrame containing 'input', 'question', and 'answer' columns.
|
117 |
+
|
118 |
+
Returns:
|
119 |
+
- None
|
120 |
+
"""
|
121 |
|
122 |
+
try:
|
123 |
+
df["embedding"] = [
|
124 |
+
embedding_model.get_text_embedding([q])[0]
|
125 |
+
for q in tqdm(df["input"], desc="Generating Embeddings")
|
126 |
+
]
|
127 |
+
except Exception as e:
|
128 |
+
logger.error(f"Error generating embeddings: {e}")
|
129 |
+
return
|
130 |
|
131 |
+
# # Upload data to Pinecone in batches
|
132 |
+
BATCH_SIZE = 500
|
|
|
133 |
|
134 |
+
for i in tqdm(range(0, len(df), BATCH_SIZE), desc="Uploading Data to Pinecone"):
|
135 |
batch = df.iloc[i : i + BATCH_SIZE]
|
136 |
+
|
137 |
+
vectors = []
|
138 |
+
for idx, (embedding, (_, row_data)) in enumerate(zip(batch["embedding"], batch.iterrows())):
|
139 |
+
vector_id = f"q_{i + idx}" # Ensures IDs remain unique across batches
|
140 |
+
metadata = {
|
141 |
+
"question": row_data.get("input"),
|
142 |
+
"answer": row_data.get("output")
|
143 |
+
}
|
144 |
+
vectors.append((vector_id, embedding, metadata))
|
145 |
+
|
146 |
+
try:
|
147 |
+
index.upsert(vectors)
|
148 |
+
except Exception as e:
|
149 |
+
logger.error(f"Error uploading batch starting at index {i}: {e}")
|
150 |
|
151 |
+
logger.info("All question-answer pairs stored successfully!")
|
152 |
|
153 |
+
|
154 |
+
# df = dataset.get_data_set()[19000:21000]
|
155 |
+
# upsert_data_in_db(df)
|
156 |
+
# response = search_vector_store("What is the treatment for diabetes?")
|
157 |
+
# print(response)
|
src/backend/main.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from fastapi import FastAPI
|
2 |
-
from routes import chat_api
|
3 |
|
4 |
app = FastAPI()
|
5 |
|
6 |
app.include_router(chat_api.router, prefix="/chat", tags=["chat"])
|
7 |
-
|
|
|
1 |
from fastapi import FastAPI
|
2 |
+
from routes import chat_api,upsert_data
|
3 |
|
4 |
app = FastAPI()
|
5 |
|
6 |
app.include_router(chat_api.router, prefix="/chat", tags=["chat"])
|
7 |
+
app.include_router(upsert_data.router, prefix="/data", tags=["data"])
|
src/backend/models/__pycache__/embedding_model.cpython-313.pyc
CHANGED
Binary files a/src/backend/models/__pycache__/embedding_model.cpython-313.pyc and b/src/backend/models/__pycache__/embedding_model.cpython-313.pyc differ
|
|
src/backend/models/__pycache__/llm_model.cpython-313.pyc
CHANGED
Binary files a/src/backend/models/__pycache__/llm_model.cpython-313.pyc and b/src/backend/models/__pycache__/llm_model.cpython-313.pyc differ
|
|
src/backend/models/__pycache__/schemas.cpython-313.pyc
CHANGED
Binary files a/src/backend/models/__pycache__/schemas.cpython-313.pyc and b/src/backend/models/__pycache__/schemas.cpython-313.pyc differ
|
|
src/backend/models/embedding_model.py
CHANGED
@@ -9,9 +9,8 @@ model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
9 |
|
10 |
def get_text_embedding(search_query: str):
|
11 |
try:
|
12 |
-
logger.info(f"Getting embedding for the text: {search_query}")
|
13 |
text_embedding = model.encode(search_query, convert_to_tensor=True).cpu().numpy().tolist()
|
14 |
-
logger.info("Text embedding successfully retrieved.")
|
15 |
return text_embedding
|
16 |
except Exception as e:
|
17 |
logger.error(f"Error while getting embedding for text: {e}")
|
|
|
9 |
|
10 |
def get_text_embedding(search_query: str):
|
11 |
try:
|
|
|
12 |
text_embedding = model.encode(search_query, convert_to_tensor=True).cpu().numpy().tolist()
|
13 |
+
# logger.info("Text embedding successfully retrieved.")
|
14 |
return text_embedding
|
15 |
except Exception as e:
|
16 |
logger.error(f"Error while getting embedding for text: {e}")
|
src/backend/models/llm_model.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
from groq import Groq
|
3 |
from utils import logger
|
4 |
from data import chroma_db
|
|
|
5 |
from dotenv import load_dotenv
|
6 |
|
7 |
load_dotenv()
|
@@ -25,7 +26,7 @@ def get_medical_assistant_response(prompt: list):
|
|
25 |
if not prompt or len(prompt[0]) < 5:
|
26 |
return "⚠️ Your question seems too short. Please provide more details so I can assist you better."
|
27 |
query = prompt[-1]
|
28 |
-
response =
|
29 |
|
30 |
if response and "metadatas" in response and response["metadatas"]:
|
31 |
retrieved_contexts = [metadata['answer'] for metadata in response["metadatas"][0]]
|
@@ -68,7 +69,7 @@ def get_medical_assistant_request(conversation_history: list):
|
|
68 |
return "⚠️ Please provide more details so I can assist you better."
|
69 |
latest_user_message = conversation_history[-1]["content"]
|
70 |
retrieved_contexts = []
|
71 |
-
chroma_response =
|
72 |
if chroma_response and "metadatas" in chroma_response and chroma_response["metadatas"]:
|
73 |
retrieved_contexts = [metadata['answer'] for metadata in chroma_response["metadatas"][0]]
|
74 |
context = "\n".join(retrieved_contexts[:3]) if retrieved_contexts else "No relevant information found in the database."
|
|
|
2 |
from groq import Groq
|
3 |
from utils import logger
|
4 |
from data import chroma_db
|
5 |
+
from data import pinecone_db
|
6 |
from dotenv import load_dotenv
|
7 |
|
8 |
load_dotenv()
|
|
|
26 |
if not prompt or len(prompt[0]) < 5:
|
27 |
return "⚠️ Your question seems too short. Please provide more details so I can assist you better."
|
28 |
query = prompt[-1]
|
29 |
+
response = pinecone_db.search_vector_store(query)
|
30 |
|
31 |
if response and "metadatas" in response and response["metadatas"]:
|
32 |
retrieved_contexts = [metadata['answer'] for metadata in response["metadatas"][0]]
|
|
|
69 |
return "⚠️ Please provide more details so I can assist you better."
|
70 |
latest_user_message = conversation_history[-1]["content"]
|
71 |
retrieved_contexts = []
|
72 |
+
chroma_response = pinecone_db.search_vector_store(latest_user_message)
|
73 |
if chroma_response and "metadatas" in chroma_response and chroma_response["metadatas"]:
|
74 |
retrieved_contexts = [metadata['answer'] for metadata in chroma_response["metadatas"][0]]
|
75 |
context = "\n".join(retrieved_contexts[:3]) if retrieved_contexts else "No relevant information found in the database."
|
src/backend/models/schemas.py
CHANGED
@@ -6,4 +6,8 @@ class Chat_Response(BaseModel):
|
|
6 |
response: Optional[Dict] = None
|
7 |
|
8 |
class ChatRequest(BaseModel):
|
9 |
-
conversation_history: List[Dict]
|
|
|
|
|
|
|
|
|
|
6 |
response: Optional[Dict] = None
|
7 |
|
8 |
class ChatRequest(BaseModel):
|
9 |
+
conversation_history: List[Dict]
|
10 |
+
|
11 |
+
class Add_Data_In_DB(BaseModel):
|
12 |
+
start: int
|
13 |
+
end: int
|
src/backend/routes/__pycache__/upsert_data.cpython-313.pyc
CHANGED
Binary files a/src/backend/routes/__pycache__/upsert_data.cpython-313.pyc and b/src/backend/routes/__pycache__/upsert_data.cpython-313.pyc differ
|
|
src/backend/routes/upsert_data.py
CHANGED
@@ -1,15 +1,19 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
4 |
|
5 |
-
|
6 |
-
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter,HTTPException
|
2 |
+
from data import dataset
|
3 |
+
from data import pinecone_db
|
4 |
+
from models.schemas import Add_Data_In_DB
|
5 |
|
6 |
+
router = APIRouter()
|
7 |
+
index_name = "question-answering-index"
|
8 |
|
9 |
+
@router.post("/upsert_data")
|
10 |
+
async def upsert_data(add_data: Add_Data_In_DB):
|
11 |
+
|
12 |
+
try:
|
13 |
+
start = add_data.start
|
14 |
+
end = add_data.end
|
15 |
+
df = dataset.get_data_set()[start:end]
|
16 |
+
pinecone_db.upsert_data_in_db(df, index_name)
|
17 |
+
return {"status": "success"}
|
18 |
+
except Exception as e:
|
19 |
+
raise HTTPException(status_code=500, detail=str(e))
|
src/frontend/app/__pycache__/common_fuctions.cpython-313.pyc
CHANGED
Binary files a/src/frontend/app/__pycache__/common_fuctions.cpython-313.pyc and b/src/frontend/app/__pycache__/common_fuctions.cpython-313.pyc differ
|
|
src/frontend/app/__pycache__/homepage.cpython-313.pyc
CHANGED
Binary files a/src/frontend/app/__pycache__/homepage.cpython-313.pyc and b/src/frontend/app/__pycache__/homepage.cpython-313.pyc differ
|
|
src/frontend/app/common_fuctions.py
CHANGED
@@ -3,6 +3,7 @@ import base64
|
|
3 |
import requests
|
4 |
from dotenv import load_dotenv
|
5 |
from utils import logger
|
|
|
6 |
|
7 |
load_dotenv()
|
8 |
logger = logger.get_logger()
|
@@ -27,12 +28,29 @@ def get_api_response(endpoint:str, prompt: list):
|
|
27 |
logger.info(f"Sending user prompt to API endpoint: {API_URL}{endpoint}")
|
28 |
response = requests.post(f"{API_URL}{endpoint}", json={"prompt": prompt})
|
29 |
if response.status_code == 200:
|
30 |
-
return response.json()
|
31 |
else:
|
32 |
return "An error occurred while processing your request."
|
33 |
except Exception as e:
|
34 |
return f"An error occurred while processing your request: {str(e)}"
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
def initialize_conversation():
|
37 |
|
38 |
assistant_message = "Hello! I am Yuvabe Care Companion AI. How can I assist you with your health-related queries today?"
|
|
|
3 |
import requests
|
4 |
from dotenv import load_dotenv
|
5 |
from utils import logger
|
6 |
+
import json
|
7 |
|
8 |
load_dotenv()
|
9 |
logger = logger.get_logger()
|
|
|
28 |
logger.info(f"Sending user prompt to API endpoint: {API_URL}{endpoint}")
|
29 |
response = requests.post(f"{API_URL}{endpoint}", json={"prompt": prompt})
|
30 |
if response.status_code == 200:
|
31 |
+
return response.json()
|
32 |
else:
|
33 |
return "An error occurred while processing your request."
|
34 |
except Exception as e:
|
35 |
return f"An error occurred while processing your request: {str(e)}"
|
36 |
|
37 |
+
|
38 |
+
def upsert_data_request(start, end):
|
39 |
+
headers = {"Content-Type": "application/json"}
|
40 |
+
payload = {
|
41 |
+
"start": start,
|
42 |
+
"end": end
|
43 |
+
}
|
44 |
+
|
45 |
+
try:
|
46 |
+
url = "http://localhost:8000/data/upsert_data"
|
47 |
+
response = requests.post(url, data=json.dumps(payload), headers=headers)
|
48 |
+
return response
|
49 |
+
except requests.exceptions.HTTPError as http_err:
|
50 |
+
print(f"HTTP error occurred: {http_err}")
|
51 |
+
except Exception as err:
|
52 |
+
print(f"An error occurred: {err}")
|
53 |
+
|
54 |
def initialize_conversation():
|
55 |
|
56 |
assistant_message = "Hello! I am Yuvabe Care Companion AI. How can I assist you with your health-related queries today?"
|
src/frontend/app/homepage.py
CHANGED
@@ -60,11 +60,11 @@ def handle_user_input():
|
|
60 |
response = "⚠️ Oops! Something went wrong. Please try again."
|
61 |
|
62 |
with st.chat_message("assistant"):
|
63 |
-
st.markdown(response)
|
64 |
|
65 |
st.session_state.messages.append({"role": "assistant", "content": response})
|
66 |
|
67 |
-
logger.info(f"Assistant response: {response[:100]}...")
|
68 |
|
69 |
# def handle_user_input():
|
70 |
|
|
|
60 |
response = "⚠️ Oops! Something went wrong. Please try again."
|
61 |
|
62 |
with st.chat_message("assistant"):
|
63 |
+
st.markdown(response['response'])
|
64 |
|
65 |
st.session_state.messages.append({"role": "assistant", "content": response})
|
66 |
|
67 |
+
logger.info(f"Assistant response: {response['response'][:100]}...")
|
68 |
|
69 |
# def handle_user_input():
|
70 |
|
src/frontend/pages/database_response_page.py
CHANGED
@@ -19,10 +19,9 @@ if prompt:
|
|
19 |
endpoint = "/chat/db_response"
|
20 |
response = common_fuctions.get_api_response(endpoint, [prompt])
|
21 |
st.subheader("✅ Relevant question and answer pair found in the database.")
|
22 |
-
for
|
23 |
-
|
24 |
-
st.write("
|
25 |
-
st.write("Answer:", entry["answer"])
|
26 |
st.write("-" * 80)
|
27 |
|
28 |
if st.button("Clear chat"):
|
|
|
19 |
endpoint = "/chat/db_response"
|
20 |
response = common_fuctions.get_api_response(endpoint, [prompt])
|
21 |
st.subheader("✅ Relevant question and answer pair found in the database.")
|
22 |
+
for metadata in response:
|
23 |
+
st.write("Question:", metadata["question"])
|
24 |
+
st.write("Answer:", metadata["answer"])
|
|
|
25 |
st.write("-" * 80)
|
26 |
|
27 |
if st.button("Clear chat"):
|
src/frontend/pages/dataloader_page.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from app import common_fuctions
|
2 |
+
import streamlit as st
|
3 |
+
from app import homepage
|
4 |
+
from utils import logger
|
5 |
+
|
6 |
+
logger = logger.get_logger()
|
7 |
+
|
8 |
+
homepage.config_homepage()
|
9 |
+
st.title("Data Loader")
|
10 |
+
|
11 |
+
def load_data():
|
12 |
+
st.sidebar.header("📊 Data Loading Parameters")
|
13 |
+
start_index = st.sidebar.number_input("Select start index", min_value=0, value=0)
|
14 |
+
end_index = st.sidebar.number_input("Select end index", min_value=0, value=100)
|
15 |
+
|
16 |
+
if start_index > end_index:
|
17 |
+
st.sidebar.error("⚠️ Start index must be earlier than the end index.")
|
18 |
+
return
|
19 |
+
|
20 |
+
if "load_clicked" not in st.session_state:
|
21 |
+
st.session_state.load_clicked = False
|
22 |
+
|
23 |
+
try:
|
24 |
+
st.sidebar.info(f"Click the button to load data from index **{start_index} to {end_index}**.")
|
25 |
+
if st.sidebar.button("🚀 Upsert Data", disabled=st.session_state.load_clicked, help="Click to insert data into the database"):
|
26 |
+
st.session_state.load_clicked = True
|
27 |
+
|
28 |
+
with st.spinner("⏳ Upserting data... Please wait"):
|
29 |
+
response = common_fuctions.upsert_data_request(start_index, end_index)
|
30 |
+
st.write(response)
|
31 |
+
# if response.get("status") == "success":
|
32 |
+
# st.success("Data upserted successfully!")
|
33 |
+
# st.session_state.load_clicked = False
|
34 |
+
# else:
|
35 |
+
# st.error("Failed to upsert data.")
|
36 |
+
# logger.error("Failed to upsert data.")
|
37 |
+
# st.session_state.load_clicked = False
|
38 |
+
except Exception as e:
|
39 |
+
st.error(f"Error loading data: {e}")
|
40 |
+
logger.error(f"Error loading data: {e}")
|
41 |
+
st.session_state.load_clicked = False
|
42 |
+
|
43 |
+
load_data()
|