Yaron Koresh commited on
Commit
a3ed68b
·
verified ·
1 Parent(s): 314085e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -558,7 +558,7 @@ def summarize(
558
  text, max_len=20, min_len=10
559
  ):
560
  log(f'CALL summarize')
561
- inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=float('inf'), truncation=False).input_ids
562
  if get_tensor_length(inputs) < 3:
563
  print("Summarization Error: Text is too short, 3 words minimum!")
564
  return text
@@ -640,15 +640,14 @@ def translate(txt,to_lang="en",from_lang=False):
640
  if(from_lang == to_lang):
641
  log(f'RET translate with txt as {txt}')
642
  return txt
643
- inputs = tokenizer.encode(f"translate {from_lang} to {to_lang}: " + txt, return_tensors="pt", max_length=float('inf'), truncation=False).input_ids
644
  chunks_length = math.ceil(get_tensor_length(inputs) / 512)
645
  ret = ""
646
  for index in range(chunks_length):
647
- ret = ret + ("" if ret == "" else " ") + tokenizer.decode(
648
- model.generate(
649
- torch.tensor([list(inputs[0][ index*512:index*512+512 ])])
650
- )[0], skip_special_tokens=True
651
- )
652
  log(f'RET translate with ret as {ret}')
653
  return ret
654
 
 
558
  text, max_len=20, min_len=10
559
  ):
560
  log(f'CALL summarize')
561
+ inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=float('inf'), truncation=False)
562
  if get_tensor_length(inputs) < 3:
563
  print("Summarization Error: Text is too short, 3 words minimum!")
564
  return text
 
640
  if(from_lang == to_lang):
641
  log(f'RET translate with txt as {txt}')
642
  return txt
643
+ inputs = tokenizer.encode(f"translate {from_lang} to {to_lang}: " + txt, return_tensors="pt", max_length=float('inf'), truncation=False)
644
  chunks_length = math.ceil(get_tensor_length(inputs) / 512)
645
  ret = ""
646
  for index in range(chunks_length):
647
+ chunk = torch.tensor([list(inputs[0][ index*512:index*512+512 ])])
648
+ gen = model.generate(chunk)
649
+ toks = tokenizer.decode(gen[0], skip_special_tokens=True)
650
+ ret = ret + ("" if ret == "" else " ") + toks
 
651
  log(f'RET translate with ret as {ret}')
652
  return ret
653