import streamlit as st
def chunk_text(text, chunk_size, overlap):
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
chunks.append(text[start:end])
start += (chunk_size - overlap)
return chunks
def main():
st.set_page_config(page_title="Text Chunker", page_icon="✂️", layout="centered")
# Custom CSS for styling
st.markdown("""
""", unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
# Example text
example_text = """Natural language processing (NLP) is a subfield of linguistics, computer science,
and artificial intelligence concerned with the interactions between computers and human language.
It focuses on how to program computers to process and analyze large amounts of natural language data.
The result is a computer capable of understanding natural language in a way that is both meaningful
and useful to humans."""
# Inputs
input_text = st.text_area("Input Text", value=example_text, height=200)
col1, col2 = st.columns(2)
with col1:
chunk_size = st.slider("Chunk Size (characters)", 50, 200, 100, 10)
with col2:
overlap = st.slider("Overlap (characters)", 0, 50, 20, 5)
if overlap >= chunk_size:
st.error("Overlap must be smaller than chunk size!")
return
# Processing
if st.button("Chunk It!", type="primary"):
chunks = chunk_text(input_text, chunk_size, overlap)
st.markdown(f"**🔖 {len(chunks)} Chunks Created**")
for i, chunk in enumerate(chunks, 1):
with st.container():
st.markdown(f"""
Chunk #{i} (Length: {len(chunk)})
{chunk}
""", unsafe_allow_html=True)
st.success("✅ Chunking completed! Scroll to see all chunks.")
if __name__ == "__main__":
main()