Spaces:
Runtime error
Runtime error
import streamlit as st | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
import tiktoken | |
CHARACTER_LENGTH = "length_function=lambda x: len(x)" | |
TOKEN_LENGTH = enc = tiktoken.get_encoding("cl100k_base") | |
length_function = lambda text: len(enc.encode(text)) | |
# Streamlit UI | |
st.title("Understand Chunk and Token") | |
chunk_size = st.number_input( | |
min_value=1, | |
label="Chunk Size", | |
value=1000 | |
) | |
chunk_overlap = st.number_input( | |
min_value=1, | |
max_value=chunk_size - 1, | |
label="Chunk Overlap", | |
value=int(chunk_size * 0.2) | |
) | |
length_function_option = st.selectbox( | |
"Length Function", | |
["Characters", "Tokens"] | |
) | |
length_function_option = Tokens | |
# Text-Eingabe | |
doc = st.text_area("Füge hier deinen Text ein:") | |
# Button zum Splitten des Textes | |
if st.button("Split Text"): | |
splitter = RecursiveCharacterTextSplitter( | |
chunk_size=chunk_size, | |
chunk_overlap=chunk_overlap, | |
length_function=length_function | |
) | |
# Aufteilen des Textes | |
splits = splitter.split_text(doc) | |
# Ausgabe der erstellten Textsplitter | |
for idx, split in enumerate(splits, start=1): | |
st.text_area(f"Teilstück {idx}", split, height=150) | |