Spaces:
Sleeping
Sleeping
File size: 416 Bytes
95305d3 |
1 2 3 4 5 6 7 8 9 10 11 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
def PreprocessingData(documents, chunk_size=1500, chunk_overlap=40):
"""Chunk documents into smaller parts for embedding."""
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap
)
chunked_docs = text_splitter.split_documents(documents)
return chunked_docs
|