Ajey95 commited on
Commit
01d70c4
·
1 Parent(s): a34718b

Fix: tools addition

Browse files
Files changed (2) hide show
  1. requirements.txt +0 -0
  2. utils/helpers.py +36 -0
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
utils/helpers.py CHANGED
@@ -8,6 +8,42 @@ import os
8
  import random
9
  from datetime import datetime
10
  from zoneinfo import ZoneInfo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def load_quotes():
13
  """Load inspirational quotes from Gita/Vedas"""
 
8
  import random
9
  from datetime import datetime
10
  from zoneinfo import ZoneInfo
11
+ from langchain_community.document_loaders import PyPDFDirectoryLoader
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
14
+ from langchain_community.vectorstores import FAISS
15
+
16
+ def create_vector_store():
17
+ """
18
+ Checks if a vector store index exists. If not, it creates one from
19
+ the PDFs in the knowledge_base folder.
20
+ """
21
+ persist_directory = 'faiss_index'
22
+ if os.path.exists(persist_directory):
23
+ print("--- Knowledge base (FAISS index) already exists. Loading... ---")
24
+ return
25
+
26
+ # Check if there are files to process
27
+ if not os.path.exists("./knowledge_base") or not os.listdir("./knowledge_base"):
28
+ print("--- 'knowledge_base' folder is empty or does not exist. Skipping index creation. ---")
29
+ return
30
+
31
+ print("--- Creating new knowledge base... ---")
32
+ loader = PyPDFDirectoryLoader("./knowledge_base/")
33
+ documents = loader.load()
34
+ if not documents:
35
+ print("--- No documents could be loaded. Skipping index creation. ---")
36
+ return
37
+
38
+ print(f"--- Loaded {len(documents)} document(s). Splitting text... ---")
39
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
40
+ docs = text_splitter.split_documents(documents)
41
+
42
+ print(f"--- Creating embeddings and vector store. This may take a moment... ---")
43
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
44
+ db = FAISS.from_documents(docs, embeddings)
45
+ db.save_local(persist_directory)
46
+ print("--- Knowledge base created successfully. ---")
47
 
48
  def load_quotes():
49
  """Load inspirational quotes from Gita/Vedas"""