Adding sentence transformers and some logs
Browse files- indexer.py +4 -0
- requirements.txt +1 -0
indexer.py
CHANGED
|
@@ -11,12 +11,15 @@ loader = DirectoryLoader('.', glob="./source/*.html", loader_cls=BSHTMLLoader)
|
|
| 11 |
docs=loader.load()
|
| 12 |
|
| 13 |
#splitting the text into chunks, trying with 1000 size
|
|
|
|
| 14 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 15 |
texts = text_splitter.split_documents(docs)
|
| 16 |
|
| 17 |
#init db and embeddings
|
|
|
|
| 18 |
persist_directory="./index/chroma"
|
| 19 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
|
|
| 20 |
vectordb = Chroma.from_documents(documents=texts,
|
| 21 |
embedding=embeddings,
|
| 22 |
persist_directory=persist_directory)
|
|
@@ -29,6 +32,7 @@ meta=[{"n":1},{"z":2},{},{},{"n":3},{"n":4}]
|
|
| 29 |
|
| 30 |
#docsearch=FAISS.from_texts(sentences,embeddings,meta,id)
|
| 31 |
#m=docsearch.similarity_search_with_score(query2,filter={"n":2})
|
|
|
|
| 32 |
query="How to Increase Flexibility Without Losing Productivity"
|
| 33 |
docs = vectordb.similarity_search(query)
|
| 34 |
for i in range(0,len(docs)):
|
|
|
|
| 11 |
docs=loader.load()
|
| 12 |
|
| 13 |
#splitting the text into chunks, trying with 1000 size
|
| 14 |
+
print("splitting to chunks")
|
| 15 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 16 |
texts = text_splitter.split_documents(docs)
|
| 17 |
|
| 18 |
#init db and embeddings
|
| 19 |
+
print("Creating embeddings")
|
| 20 |
persist_directory="./index/chroma"
|
| 21 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 22 |
+
print("Storing in db")
|
| 23 |
vectordb = Chroma.from_documents(documents=texts,
|
| 24 |
embedding=embeddings,
|
| 25 |
persist_directory=persist_directory)
|
|
|
|
| 32 |
|
| 33 |
#docsearch=FAISS.from_texts(sentences,embeddings,meta,id)
|
| 34 |
#m=docsearch.similarity_search_with_score(query2,filter={"n":2})
|
| 35 |
+
print("Querying db")
|
| 36 |
query="How to Increase Flexibility Without Losing Productivity"
|
| 37 |
docs = vectordb.similarity_search(query)
|
| 38 |
for i in range(0,len(docs)):
|
requirements.txt
CHANGED
|
@@ -6,5 +6,6 @@ fastapi
|
|
| 6 |
loguru
|
| 7 |
chromadb
|
| 8 |
langchain
|
|
|
|
| 9 |
sse_starlette
|
| 10 |
dropbox
|
|
|
|
| 6 |
loguru
|
| 7 |
chromadb
|
| 8 |
langchain
|
| 9 |
+
sentence_transformers
|
| 10 |
sse_starlette
|
| 11 |
dropbox
|