First_RAG_System / optimal_chunker.py
HamidOmarov's picture
Upload 7 files
e02136d verified
raw
history blame
320 Bytes
from langchain.text_splitter import RecursiveCharacterTextSplitter
def chunk_documents(docs, chunk_size=500, chunk_overlap=50):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap
)
chunks = text_splitter.split_documents(docs)
return chunks