Sebbe33 commited on
Commit
17b7b74
·
verified ·
1 Parent(s): 2737fb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -38
app.py CHANGED
@@ -1,32 +1,18 @@
1
  import streamlit as st
2
- from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter, Language
3
  import tiktoken
4
 
5
 
6
  CHARACTER_LENGTH = "length_function=lambda x: len(x)"
7
  TOKEN_LENGTH = """enc = tiktoken.get_encoding("cl100k_base")
8
  length_function = lambda text: len(enc.encode(text))
9
- """
10
- CHARACTER = """CharacterTextSplitter(
11
- separator="\\n\\n",
12
- chunk_size={chunk_size},
13
- chunk_overlap={chunk_overlap},
14
- length_function={length_function}
15
- )
16
- """
17
  RECURSIVE_CHARACTER = """RecursiveCharacterTextSplitter(
18
  chunk_size={chunk_size},
19
  chunk_overlap={chunk_overlap},
20
  length_function={length_function}
21
  )
22
- """
23
- LANGUAGE = """RecursiveCharacterTextSplitter.from_language(
24
- language="{language}",
25
- chunk_size={chunk_size},
26
- chunk_overlap={chunk_overlap},
27
- length_function={length_function}
28
- )
29
- """
30
 
31
  # Streamlit UI
32
  st.title("Understand Chunk and Token")
@@ -77,30 +63,13 @@ doc = st.text_area("Füge hier deinen Text ein:")
77
 
78
  # Button zum Splitten des Textes
79
  if st.button("Split Text"):
80
- # Erzeugen des Splitter-Objekts basierend auf der Auswahl
81
- if splitter_choice == "Character":
82
- splitter = CharacterTextSplitter(
83
- separator="\n\n",
84
- chunk_size=chunk_size,
85
- chunk_overlap=chunk_overlap,
86
- length_function=length_function
87
- )
88
- elif splitter_choice == "RecursiveCharacter":
89
- splitter = RecursiveCharacterTextSplitter(
90
- chunk_size=chunk_size,
91
- chunk_overlap=chunk_overlap,
92
- length_function=length_function
93
- )
94
- elif "Language." in splitter_choice:
95
- lang = splitter_choice.split(".")[1].lower()
96
- splitter = RecursiveCharacterTextSplitter.from_language(
97
- language=lang,
98
  chunk_size=chunk_size,
99
  chunk_overlap=chunk_overlap,
100
  length_function=length_function
101
- )
102
- else:
103
- raise ValueError("Ungültige Wahl beim Text Splitter.")
104
 
105
  # Aufteilen des Textes
106
  splits = splitter.split_text(doc)
 
1
  import streamlit as st
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
  import tiktoken
4
 
5
 
6
  CHARACTER_LENGTH = "length_function=lambda x: len(x)"
7
  TOKEN_LENGTH = """enc = tiktoken.get_encoding("cl100k_base")
8
  length_function = lambda text: len(enc.encode(text))
9
+
 
 
 
 
 
 
 
10
  RECURSIVE_CHARACTER = """RecursiveCharacterTextSplitter(
11
  chunk_size={chunk_size},
12
  chunk_overlap={chunk_overlap},
13
  length_function={length_function}
14
  )
15
+
 
 
 
 
 
 
 
16
 
17
  # Streamlit UI
18
  st.title("Understand Chunk and Token")
 
63
 
64
  # Button zum Splitten des Textes
65
  if st.button("Split Text"):
66
+
67
+ splitter = RecursiveCharacterTextSplitter(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  chunk_size=chunk_size,
69
  chunk_overlap=chunk_overlap,
70
  length_function=length_function
71
+ )
72
+
 
73
 
74
  # Aufteilen des Textes
75
  splits = splitter.split_text(doc)