Spaces:
Runtime error
Runtime error
File size: 1,114 Bytes
d82600f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
def chunk_text(text: str, chunk_size: int = 1000, chunk_overlap: int = 200) -> list:
"""
Split text into chunks using RecursiveCharacterTextSplitter
Args:
text: Text to split into chunks
chunk_size: Size of each chunk
chunk_overlap: Overlap between chunks
Returns:
List of text chunks
"""
try:
if not text or not text.strip():
print("[WARNING] Empty or None text provided for chunking")
return []
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len,
)
chunks = text_splitter.split_text(text)
print(f"[INFO] Created {len(chunks)} chunks from text")
return chunks
except Exception as e:
print(f"[ERROR] Text chunking failed: {e}")
# Return the original text as a single chunk as fallback
return [text] if text else [] |