Spaces:
Build error
Build error
Make tokenizer more robuts
Browse files
app.py
CHANGED
|
@@ -36,9 +36,13 @@ def count_string_tokens(string: str, model: str) -> int:
|
|
| 36 |
try:
|
| 37 |
encoding = tiktoken.encoding_for_model(model.split('/')[-1])
|
| 38 |
except:
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
print(f"Model {model} not found. Using cl100k_base encoding.")
|
| 43 |
encoding = tiktoken.get_encoding("cl100k_base")
|
| 44 |
return len(encoding.encode(string))
|
|
@@ -179,7 +183,7 @@ with gr.Blocks(css="""
|
|
| 179 |
max_price = gr.Slider(label="Max Price per Input Token", minimum=0, maximum=0.001, step=0.00001, value=0.001)
|
| 180 |
litellm_provider = gr.Dropdown(label="Inference Provider", choices=["Any"] + TOKEN_COSTS['litellm_provider'].unique().tolist(), value="Any")
|
| 181 |
|
| 182 |
-
model = gr.Dropdown(label="Models (at least 1)", choices=TOKEN_COSTS['model'].tolist(), value="anyscale/meta-llama/Meta-Llama-3-8B-Instruct", multiselect=True)
|
| 183 |
|
| 184 |
gr.Markdown("## Resulting Costs 👇")
|
| 185 |
|
|
|
|
| 36 |
try:
|
| 37 |
encoding = tiktoken.encoding_for_model(model.split('/')[-1])
|
| 38 |
except:
|
| 39 |
+
if len(model.split('/')) > 1:
|
| 40 |
+
try:
|
| 41 |
+
encoding = tiktoken.encoding_for_model(model.split('/')[-2] + '/' + model.split('/')[-1])
|
| 42 |
+
except KeyError:
|
| 43 |
+
print(f"Model {model} not found. Using cl100k_base encoding.")
|
| 44 |
+
encoding = tiktoken.get_encoding("cl100k_base")
|
| 45 |
+
else:
|
| 46 |
print(f"Model {model} not found. Using cl100k_base encoding.")
|
| 47 |
encoding = tiktoken.get_encoding("cl100k_base")
|
| 48 |
return len(encoding.encode(string))
|
|
|
|
| 183 |
max_price = gr.Slider(label="Max Price per Input Token", minimum=0, maximum=0.001, step=0.00001, value=0.001)
|
| 184 |
litellm_provider = gr.Dropdown(label="Inference Provider", choices=["Any"] + TOKEN_COSTS['litellm_provider'].unique().tolist(), value="Any")
|
| 185 |
|
| 186 |
+
model = gr.Dropdown(label="Models (at least 1)", choices=TOKEN_COSTS['model'].tolist(), value=["anyscale/meta-llama/Meta-Llama-3-8B-Instruct", "gpt-4o", "claude-3-sonnet-20240229"], multiselect=True)
|
| 187 |
|
| 188 |
gr.Markdown("## Resulting Costs 👇")
|
| 189 |
|