k3ybladewielder commited on
Commit
86215d1
·
verified ·
1 Parent(s): 7aede72

Upload functions.py

Browse files
Files changed (1) hide show
  1. functions.py +43 -0
functions.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #----------- SETUP -----------
2
+ from langchain_community.document_loaders import WebBaseLoader
3
+ from langchain_text_splitters import CharacterTextSplitter
4
+ from langchain_community.vectorstores import FAISS
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from dotenv import load_dotenv
7
+ import os
8
+ import logging
9
+ logging.getLogger("langchain.text_splitter").setLevel(logging.ERROR)
10
+ import warnings
11
+ warnings.filterwarnings("ignore")
12
+ import yaml
13
+
14
+ # ----------- PARAMS -----------
15
+ with open('./config.yaml', 'r', encoding='utf-8') as file:
16
+ config = yaml.safe_load(file)
17
+ EMBEDDING_MODEL = config.get('EMBEDDING_MODEL')
18
+ LLM_MODEL = config.get('LLM_MODEL')
19
+ REBUILD_VECTOR_STORE = config.get('REBUILD_VECTOR_STORE')
20
+ CHUNK_SIZE = config.get('CHUNK_SIZE')
21
+ CHUNK_OVERLAP = config.get('CHUNK_OVERLAP')
22
+ CACHE_FOLDER = config.get('CACHE_FOLDER')
23
+ URL_LIST = config.get('URL_LIST')
24
+ VS_BASE = config.get('VS_BASE')
25
+
26
+ # ----------- VECTOR STORE CREATION -----------
27
+ def fn_rebuild_vector_store(REBUILD_VECTOR_STORE, URL_LIST, VS_BASE, EMBEDDING_MODEL, CACHE_FOLDER, CHUNK_SIZE, CHUNK_OVERLAP):
28
+ if REBUILD_VECTOR_STORE:
29
+ print("[INFO] REBUILD_VECTOR_STORE was set True. Recreating the vector store...")
30
+ loader = WebBaseLoader(web_paths=URL_LIST)
31
+ docs = loader.load()
32
+ text_splitter = CharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
33
+ split_docs = text_splitter.split_documents(docs)
34
+ embeddings = HuggingFaceEmbeddings(
35
+ model_name=EMBEDDING_MODEL,
36
+ cache_folder=CACHE_FOLDER)
37
+ vector_store = FAISS.from_documents(split_docs, embeddings)
38
+ os.makedirs(VS_BASE, exist_ok=True)
39
+ vector_store.save_local(VS_BASE)
40
+ print(f"[INFO] Vector Store saved in the path: {VS_BASE}")
41
+ else:
42
+ print("[INFO] REBUILD_VECTOR_STORE was set False. Using the current vector store...")
43
+ return print(f"[INFO] End of vector store process")