Spaces:
Sleeping
Sleeping
Update pipeline.py
Browse files- pipeline.py +2 -10
pipeline.py
CHANGED
|
@@ -188,9 +188,6 @@ def classify_query(query: str) -> str:
|
|
| 188 |
################################################################################
|
| 189 |
|
| 190 |
def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
|
| 191 |
-
"""
|
| 192 |
-
Builds or loads a FAISS vector store for CSV documents containing 'Question' and 'Answers'.
|
| 193 |
-
"""
|
| 194 |
try:
|
| 195 |
if os.path.exists(store_dir):
|
| 196 |
print(f"DEBUG: Found existing FAISS store at '{store_dir}'. Loading...")
|
|
@@ -202,22 +199,18 @@ def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
|
|
| 202 |
df = pd.read_csv(csv_path)
|
| 203 |
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
|
| 204 |
df.columns = df.columns.str.strip()
|
| 205 |
-
|
| 206 |
-
# Fix possible column name variations
|
| 207 |
if "Answer" in df.columns:
|
| 208 |
df.rename(columns={"Answer": "Answers"}, inplace=True)
|
| 209 |
if "Question" not in df.columns and "Question " in df.columns:
|
| 210 |
df.rename(columns={"Question ": "Question"}, inplace=True)
|
| 211 |
if "Question" not in df.columns or "Answers" not in df.columns:
|
| 212 |
raise ValueError("CSV must have 'Question' and 'Answers' columns.")
|
| 213 |
-
|
| 214 |
docs = []
|
| 215 |
for _, row in df.iterrows():
|
| 216 |
q = str(row["Question"])
|
| 217 |
ans = str(row["Answers"])
|
| 218 |
doc = Document(page_content=ans, metadata={"question": q})
|
| 219 |
docs.append(doc)
|
| 220 |
-
|
| 221 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
|
| 222 |
vectorstore = FAISS.from_documents(docs, embedding=embeddings)
|
| 223 |
vectorstore.save_local(store_dir)
|
|
@@ -227,11 +220,11 @@ def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
|
|
| 227 |
raise RuntimeError(f"Error building/loading vector store: {str(e)}")
|
| 228 |
|
| 229 |
def build_rag_chain(vectorstore: FAISS) -> RetrievalQA:
|
| 230 |
-
"""Build RAG chain using the Gemini LLM."""
|
| 231 |
try:
|
| 232 |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
|
| 233 |
chain = RetrievalQA.from_chain_type(
|
| 234 |
-
llm=gemini_llm,
|
| 235 |
chain_type="stuff",
|
| 236 |
retriever=retriever,
|
| 237 |
return_source_documents=True
|
|
@@ -239,7 +232,6 @@ def build_rag_chain(vectorstore: FAISS) -> RetrievalQA:
|
|
| 239 |
return chain
|
| 240 |
except Exception as e:
|
| 241 |
raise RuntimeError(f"Error building RAG chain: {str(e)}")
|
| 242 |
-
|
| 243 |
################################################################################
|
| 244 |
# Web Search Caching: Separate FAISS Vector Store
|
| 245 |
################################################################################
|
|
|
|
| 188 |
################################################################################
|
| 189 |
|
| 190 |
def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
|
|
|
|
|
|
|
|
|
|
| 191 |
try:
|
| 192 |
if os.path.exists(store_dir):
|
| 193 |
print(f"DEBUG: Found existing FAISS store at '{store_dir}'. Loading...")
|
|
|
|
| 199 |
df = pd.read_csv(csv_path)
|
| 200 |
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
|
| 201 |
df.columns = df.columns.str.strip()
|
|
|
|
|
|
|
| 202 |
if "Answer" in df.columns:
|
| 203 |
df.rename(columns={"Answer": "Answers"}, inplace=True)
|
| 204 |
if "Question" not in df.columns and "Question " in df.columns:
|
| 205 |
df.rename(columns={"Question ": "Question"}, inplace=True)
|
| 206 |
if "Question" not in df.columns or "Answers" not in df.columns:
|
| 207 |
raise ValueError("CSV must have 'Question' and 'Answers' columns.")
|
|
|
|
| 208 |
docs = []
|
| 209 |
for _, row in df.iterrows():
|
| 210 |
q = str(row["Question"])
|
| 211 |
ans = str(row["Answers"])
|
| 212 |
doc = Document(page_content=ans, metadata={"question": q})
|
| 213 |
docs.append(doc)
|
|
|
|
| 214 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
|
| 215 |
vectorstore = FAISS.from_documents(docs, embedding=embeddings)
|
| 216 |
vectorstore.save_local(store_dir)
|
|
|
|
| 220 |
raise RuntimeError(f"Error building/loading vector store: {str(e)}")
|
| 221 |
|
| 222 |
def build_rag_chain(vectorstore: FAISS) -> RetrievalQA:
|
| 223 |
+
"""Build RAG chain using the Gemini LLM directly without a custom class."""
|
| 224 |
try:
|
| 225 |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
|
| 226 |
chain = RetrievalQA.from_chain_type(
|
| 227 |
+
llm=gemini_llm, # Directly use the ChatGoogleGenerativeAI instance
|
| 228 |
chain_type="stuff",
|
| 229 |
retriever=retriever,
|
| 230 |
return_source_documents=True
|
|
|
|
| 232 |
return chain
|
| 233 |
except Exception as e:
|
| 234 |
raise RuntimeError(f"Error building RAG chain: {str(e)}")
|
|
|
|
| 235 |
################################################################################
|
| 236 |
# Web Search Caching: Separate FAISS Vector Store
|
| 237 |
################################################################################
|