ClementRomac HF Staff commited on
Commit
b1191bf
·
1 Parent(s): 442351f

Upload processor

Browse files
Files changed (1) hide show
  1. processor.py +3 -2
processor.py CHANGED
@@ -13,7 +13,7 @@ class GIAProcessor(GitProcessor):
13
  }
14
  for i in range(len(examples["input_ids"])):
15
  _input_size = len(examples["input_ids"][i])
16
- for j in range(max(1, _input_size // max_input_size)):
17
  results["input_ids"].append(examples["input_ids"][i][j*max_input_size:(j + 1) * max_input_size])
18
  results["attention_mask"].append(examples["attention_mask"][i][j * max_input_size:(j + 1) * max_input_size])
19
 
@@ -21,7 +21,8 @@ class GIAProcessor(GitProcessor):
21
 
22
  def __call__(self, examples, max_input_size, return_tensors=None, **kwargs):
23
  if "text" in examples and not "images" in examples:
24
- encoded_text = self.tokenizer(examples["text"], return_tensors=return_tensors)
 
25
  encoding = self._cut_text(encoded_text, max_input_size)
26
  elif "text" in examples and "images" in examples:
27
  encoding = super().__call__(examples["text"], examples["images"], return_tensors, **kwargs)
 
13
  }
14
  for i in range(len(examples["input_ids"])):
15
  _input_size = len(examples["input_ids"][i])
16
+ for j in range(max(1, _input_size // max_input_size)): # skip last if smaller than max_input_size
17
  results["input_ids"].append(examples["input_ids"][i][j*max_input_size:(j + 1) * max_input_size])
18
  results["attention_mask"].append(examples["attention_mask"][i][j * max_input_size:(j + 1) * max_input_size])
19
 
 
21
 
22
  def __call__(self, examples, max_input_size, return_tensors=None, **kwargs):
23
  if "text" in examples and not "images" in examples:
24
+ encoded_text = self.tokenizer(examples["text"], return_tensors=return_tensors, max_length=max_input_size,
25
+ truncation=False, padding="max_length")
26
  encoding = self._cut_text(encoded_text, max_input_size)
27
  elif "text" in examples and "images" in examples:
28
  encoding = super().__call__(examples["text"], examples["images"], return_tensors, **kwargs)