Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,56 +1,32 @@
|
|
1 |
import streamlit as st
|
2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
-
import tiktoken
|
4 |
-
|
5 |
-
|
6 |
-
CHARACTER_LENGTH = "length_function=lambda x: len(x)"
|
7 |
-
TOKEN_LENGTH = enc = tiktoken.get_encoding("cl100k_base")
|
8 |
-
length_function = lambda text: len(enc.encode(text))
|
9 |
-
|
10 |
-
# Streamlit UI
|
11 |
-
st.title("Understand Chunk and Token")
|
12 |
-
|
13 |
|
|
|
14 |
|
15 |
chunk_size = st.number_input(
|
16 |
min_value=1,
|
17 |
label="Chunk Size",
|
18 |
-
value=
|
19 |
)
|
20 |
|
21 |
-
|
22 |
chunk_overlap = st.number_input(
|
23 |
min_value=1,
|
24 |
max_value=chunk_size - 1,
|
25 |
label="Chunk Overlap",
|
26 |
-
value=
|
27 |
)
|
28 |
|
|
|
29 |
|
30 |
-
|
31 |
-
"Length Function",
|
32 |
-
["Characters", "Tokens"]
|
33 |
-
)
|
34 |
-
|
35 |
-
|
36 |
-
length_function_option = Tokens
|
37 |
-
|
38 |
-
# Text-Eingabe
|
39 |
-
doc = st.text_area("Füge hier deinen Text ein:")
|
40 |
-
|
41 |
-
# Button zum Splitten des Textes
|
42 |
-
if st.button("Split Text"):
|
43 |
|
44 |
splitter = RecursiveCharacterTextSplitter(
|
45 |
chunk_size=chunk_size,
|
46 |
chunk_overlap=chunk_overlap,
|
47 |
-
length_function=
|
48 |
)
|
49 |
|
50 |
-
|
51 |
-
# Aufteilen des Textes
|
52 |
-
splits = splitter.split_text(doc)
|
53 |
|
54 |
-
# Ausgabe der erstellten Textsplitter
|
55 |
for idx, split in enumerate(splits, start=1):
|
56 |
-
st.text_area(f"
|
|
|
1 |
import streamlit as st
|
2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
+
st.title("Understand Chunking")
|
5 |
|
6 |
chunk_size = st.number_input(
|
7 |
min_value=1,
|
8 |
label="Chunk Size",
|
9 |
+
value=50
|
10 |
)
|
11 |
|
|
|
12 |
chunk_overlap = st.number_input(
|
13 |
min_value=1,
|
14 |
max_value=chunk_size - 1,
|
15 |
label="Chunk Overlap",
|
16 |
+
value=10
|
17 |
)
|
18 |
|
19 |
+
docs = st.text_area("Put your text:")
|
20 |
|
21 |
+
if st.button("Split"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
splitter = RecursiveCharacterTextSplitter(
|
24 |
chunk_size=chunk_size,
|
25 |
chunk_overlap=chunk_overlap,
|
26 |
+
length_function=len
|
27 |
)
|
28 |
|
29 |
+
splits = splitter.split_text(docs)
|
|
|
|
|
30 |
|
|
|
31 |
for idx, split in enumerate(splits, start=1):
|
32 |
+
st.text_area(f"Chunk {idx}", split)
|