Spaces:

jarif
/

AI-Powered-PDF-Document-Search-and-QA

Sleeping

App Files Files Community

jarif commited on Aug 25, 2024

Commit

96d0b2f

verified ·

1 Parent(s): ada8299

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -22

app.py CHANGED Viewed

@@ -1,30 +1,18 @@
 import os
-import faiss
 import logging
 import streamlit as st
 from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.vectorstores import FAISS
 from langchain.chains import RetrievalQA
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 # Configure logging
 logging.basicConfig(level=logging.DEBUG)
-def load_faiss_index(index_path):
-    if not os.path.exists(index_path):
-        logging.error(f"FAISS index not found at {index_path}. Please create the index first.")
-        st.error(f"FAISS index not found at {index_path}. Please create the index first.")
-        raise FileNotFoundError(f"FAISS index not found at {index_path}.")
-    try:
-        logging.info(f"Attempting to load FAISS index from {index_path}.")
-        index = faiss.read_index(index_path)
-        logging.info("FAISS index loaded successfully.")
-        st.success("FAISS index loaded successfully.")
-        return index
-    except Exception as e:
-        logging.error(f"Failed to load FAISS index: {e}")
-        st.error(f"Failed to load FAISS index: {e}")
-        raise
 def load_llm():
     checkpoint = "LaMini-T5-738M"
@@ -42,16 +30,13 @@ def load_llm():
     return pipe
 def process_answer(question):
-    index_path = 'faiss_index/index.faiss'
-    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     try:
-        faiss_index = load_faiss_index(index_path)
-        retriever = FAISS(index=faiss_index, embeddings=embeddings)
         llm = load_llm()
         qa = RetrievalQA.from_chain_type(
             llm=llm,
             chain_type="stuff",
-            retriever=retriever,
             return_source_documents=True
         )
         result = qa.invoke(question)

 import os
 import logging
 import streamlit as st
 from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import Chroma
 from langchain.chains import RetrievalQA
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 # Configure logging
 logging.basicConfig(level=logging.DEBUG)
+def load_vector_store():
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    vector_store = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
+    return vector_store
 def load_llm():
     checkpoint = "LaMini-T5-738M"
     return pipe
 def process_answer(question):
     try:
+        vector_store = load_vector_store()
         llm = load_llm()
         qa = RetrievalQA.from_chain_type(
             llm=llm,
             chain_type="stuff",
+            retriever=vector_store.as_retriever(),
             return_source_documents=True
         )
         result = qa.invoke(question)