import streamlit as st def chunk_text(text, chunk_size, overlap): chunks = [] start = 0 while start < len(text): end = start + chunk_size chunks.append(text[start:end]) start += (chunk_size - overlap) return chunks def main(): st.set_page_config(page_title="Text Chunker", page_icon="✂️", layout="centered") # Custom CSS for styling st.markdown(""" """, unsafe_allow_html=True) st.markdown('

✂️ Text Chunker

', unsafe_allow_html=True) # Example text example_text = """Natural language processing (NLP) is a subfield of linguistics, computer science, and artificial intelligence concerned with the interactions between computers and human language. It focuses on how to program computers to process and analyze large amounts of natural language data. The result is a computer capable of understanding natural language in a way that is both meaningful and useful to humans.""" # Inputs input_text = st.text_area("Input Text", value=example_text, height=200) col1, col2 = st.columns(2) with col1: chunk_size = st.slider("Chunk Size (characters)", 50, 200, 100, 10) with col2: overlap = st.slider("Overlap (characters)", 0, 50, 20, 5) if overlap >= chunk_size: st.error("Overlap must be smaller than chunk size!") return # Processing if st.button("Chunk It!", type="primary"): chunks = chunk_text(input_text, chunk_size, overlap) st.markdown(f"**🔖 {len(chunks)} Chunks Created**") for i, chunk in enumerate(chunks, 1): with st.container(): st.markdown(f"""

Chunk #{i} (Length: {len(chunk)})


{chunk}

""", unsafe_allow_html=True) st.success("✅ Chunking completed! Scroll to see all chunks.") if __name__ == "__main__": main()