Commit
·
b1191bf
1
Parent(s):
442351f
Upload processor
Browse files- processor.py +3 -2
processor.py
CHANGED
@@ -13,7 +13,7 @@ class GIAProcessor(GitProcessor):
|
|
13 |
}
|
14 |
for i in range(len(examples["input_ids"])):
|
15 |
_input_size = len(examples["input_ids"][i])
|
16 |
-
for j in range(max(1, _input_size // max_input_size)):
|
17 |
results["input_ids"].append(examples["input_ids"][i][j*max_input_size:(j + 1) * max_input_size])
|
18 |
results["attention_mask"].append(examples["attention_mask"][i][j * max_input_size:(j + 1) * max_input_size])
|
19 |
|
@@ -21,7 +21,8 @@ class GIAProcessor(GitProcessor):
|
|
21 |
|
22 |
def __call__(self, examples, max_input_size, return_tensors=None, **kwargs):
|
23 |
if "text" in examples and not "images" in examples:
|
24 |
-
encoded_text = self.tokenizer(examples["text"], return_tensors=return_tensors
|
|
|
25 |
encoding = self._cut_text(encoded_text, max_input_size)
|
26 |
elif "text" in examples and "images" in examples:
|
27 |
encoding = super().__call__(examples["text"], examples["images"], return_tensors, **kwargs)
|
|
|
13 |
}
|
14 |
for i in range(len(examples["input_ids"])):
|
15 |
_input_size = len(examples["input_ids"][i])
|
16 |
+
for j in range(max(1, _input_size // max_input_size)): # skip last if smaller than max_input_size
|
17 |
results["input_ids"].append(examples["input_ids"][i][j*max_input_size:(j + 1) * max_input_size])
|
18 |
results["attention_mask"].append(examples["attention_mask"][i][j * max_input_size:(j + 1) * max_input_size])
|
19 |
|
|
|
21 |
|
22 |
def __call__(self, examples, max_input_size, return_tensors=None, **kwargs):
|
23 |
if "text" in examples and not "images" in examples:
|
24 |
+
encoded_text = self.tokenizer(examples["text"], return_tensors=return_tensors, max_length=max_input_size,
|
25 |
+
truncation=False, padding="max_length")
|
26 |
encoding = self._cut_text(encoded_text, max_input_size)
|
27 |
elif "text" in examples and "images" in examples:
|
28 |
encoding = super().__call__(examples["text"], examples["images"], return_tensors, **kwargs)
|