Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -66,6 +66,26 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
|
|
66 |
def get_embeddings():
|
67 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
def update_vectors(files, parser):
|
70 |
global uploaded_documents
|
71 |
logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
|
@@ -111,6 +131,9 @@ def update_vectors(files, parser):
|
|
111 |
database.save_local("faiss_database")
|
112 |
logging.info("FAISS database saved")
|
113 |
|
|
|
|
|
|
|
114 |
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
|
115 |
choices=[doc["name"] for doc in uploaded_documents],
|
116 |
value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
|
|
|
66 |
def get_embeddings():
|
67 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
68 |
|
69 |
+
def scan_faiss_database():
|
70 |
+
global uploaded_documents
|
71 |
+
uploaded_documents = []
|
72 |
+
|
73 |
+
if os.path.exists("faiss_database"):
|
74 |
+
embed = HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
75 |
+
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
76 |
+
|
77 |
+
# Assuming each document in FAISS has a 'source' metadata field with the file name
|
78 |
+
for doc in database.docstore._dict.values():
|
79 |
+
file_name = os.path.basename(doc.metadata['source'])
|
80 |
+
if not any(d['name'] == file_name for d in uploaded_documents):
|
81 |
+
uploaded_documents.append({"name": file_name, "selected": True})
|
82 |
+
|
83 |
+
return uploaded_documents
|
84 |
+
|
85 |
+
# Call this function when the application starts
|
86 |
+
uploaded_documents = scan_faiss_database()
|
87 |
+
|
88 |
+
|
89 |
def update_vectors(files, parser):
|
90 |
global uploaded_documents
|
91 |
logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
|
|
|
131 |
database.save_local("faiss_database")
|
132 |
logging.info("FAISS database saved")
|
133 |
|
134 |
+
# After processing new files, rescan the database
|
135 |
+
uploaded_documents = scan_faiss_database()
|
136 |
+
|
137 |
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
|
138 |
choices=[doc["name"] for doc in uploaded_documents],
|
139 |
value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
|