Spaces:
Runtime error
Runtime error
Commit
·
3a67ca3
1
Parent(s):
1bc48d2
Initial commit
Browse files- README.md +0 -2
- tokens_per_byte.py +7 -7
README.md
CHANGED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
---
|
| 2 |
title: Tokens per Byte
|
| 3 |
-
datasets:
|
| 4 |
-
-
|
| 5 |
tags:
|
| 6 |
- evaluate
|
| 7 |
- measurement
|
|
|
|
| 1 |
---
|
| 2 |
title: Tokens per Byte
|
|
|
|
|
|
|
| 3 |
tags:
|
| 4 |
- evaluate
|
| 5 |
- measurement
|
tokens_per_byte.py
CHANGED
|
@@ -71,8 +71,7 @@ class TokensperByte(evaluate.Measurement):
|
|
| 71 |
inputs_description=_KWARGS_DESCRIPTION,
|
| 72 |
# This defines the format of each prediction and reference
|
| 73 |
features=datasets.Features({
|
| 74 |
-
'
|
| 75 |
-
'references': datasets.Value('int64'),
|
| 76 |
}),
|
| 77 |
# Homepage of the module for documentation
|
| 78 |
homepage="http://module.homepage",
|
|
@@ -86,10 +85,11 @@ class TokensperByte(evaluate.Measurement):
|
|
| 86 |
# TODO: Download external resources if needed
|
| 87 |
pass
|
| 88 |
|
| 89 |
-
def _compute(self,
|
| 90 |
"""Returns the scores"""
|
| 91 |
-
|
| 92 |
-
|
|
|
|
| 93 |
return {
|
| 94 |
-
"
|
| 95 |
-
}
|
|
|
|
| 71 |
inputs_description=_KWARGS_DESCRIPTION,
|
| 72 |
# This defines the format of each prediction and reference
|
| 73 |
features=datasets.Features({
|
| 74 |
+
'text':datasets.Value("string"),
|
|
|
|
| 75 |
}),
|
| 76 |
# Homepage of the module for documentation
|
| 77 |
homepage="http://module.homepage",
|
|
|
|
| 85 |
# TODO: Download external resources if needed
|
| 86 |
pass
|
| 87 |
|
| 88 |
+
def _compute(self, text, tokenizer):
|
| 89 |
"""Returns the scores"""
|
| 90 |
+
num_tokens = sum(tokenizer(text, return_length=True, return_attention_mask=False, add_special_tokens=False, return_token_type_ids=False)["length"])
|
| 91 |
+
num_bytes = sum([len(s.encode('utf-8')) for s in text])
|
| 92 |
+
|
| 93 |
return {
|
| 94 |
+
"tokens_per_byte": num_tokens / num_bytes,
|
| 95 |
+
}
|