File size: 416 Bytes
95305d3
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
from langchain.text_splitter import RecursiveCharacterTextSplitter

def PreprocessingData(documents, chunk_size=1500, chunk_overlap=40):
    """Chunk documents into smaller parts for embedding."""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    chunked_docs = text_splitter.split_documents(documents)
    return chunked_docs