Spaces:
Runtime error
Runtime error
taka-yamakoshi
commited on
Commit
·
dc80c0d
1
Parent(s):
ed9112c
minor update on instructions
Browse files
app.py
CHANGED
|
@@ -98,7 +98,8 @@ if __name__=='__main__':
|
|
| 98 |
st.markdown(generate_markdown('quick and easy way to explore how tokenizers work',size=24), unsafe_allow_html=True)
|
| 99 |
|
| 100 |
# Select and load the tokenizer
|
| 101 |
-
|
|
|
|
| 102 |
('bert-base-uncased','bert-large-cased',
|
| 103 |
'gpt2','gpt2-large',
|
| 104 |
'roberta-base','roberta-large',
|
|
@@ -106,11 +107,11 @@ if __name__=='__main__':
|
|
| 106 |
tokenizer = load_model(tokenizer_name)
|
| 107 |
|
| 108 |
st.sidebar.write('2. Optional settings')
|
| 109 |
-
st.sidebar.write(f'"Compare two texts" compares # tokens for two pieces of text '\
|
| 110 |
-
+f'and "de-tokenize" converts a list of tokenized indices back to strings.')
|
| 111 |
-
st.sidebar.write(f'For "de-tokenize", make sure to type in integers, separated by single spaces')
|
| 112 |
comparison_mode = st.sidebar.checkbox('Compare two texts')
|
| 113 |
detokenize = st.sidebar.checkbox('de-tokenize')
|
|
|
|
|
|
|
|
|
|
| 114 |
if comparison_mode:
|
| 115 |
sent_cols = st.columns(2)
|
| 116 |
num_tokens = {}
|
|
|
|
| 98 |
st.markdown(generate_markdown('quick and easy way to explore how tokenizers work',size=24), unsafe_allow_html=True)
|
| 99 |
|
| 100 |
# Select and load the tokenizer
|
| 101 |
+
st.sidebar.write('1. Choose the tokenizer from below')
|
| 102 |
+
tokenizer_name = st.sidebar.selectbox('',
|
| 103 |
('bert-base-uncased','bert-large-cased',
|
| 104 |
'gpt2','gpt2-large',
|
| 105 |
'roberta-base','roberta-large',
|
|
|
|
| 107 |
tokenizer = load_model(tokenizer_name)
|
| 108 |
|
| 109 |
st.sidebar.write('2. Optional settings')
|
|
|
|
|
|
|
|
|
|
| 110 |
comparison_mode = st.sidebar.checkbox('Compare two texts')
|
| 111 |
detokenize = st.sidebar.checkbox('de-tokenize')
|
| 112 |
+
st.sidebar.write(f'"Compare two texts" compares # tokens for two pieces of text '\
|
| 113 |
+
+f'and "de-tokenize" converts a list of tokenized indices back to strings.')
|
| 114 |
+
st.sidebar.write(f'For "de-tokenize", make sure to type in integers, separated by single spaces.')
|
| 115 |
if comparison_mode:
|
| 116 |
sent_cols = st.columns(2)
|
| 117 |
num_tokens = {}
|