Yaron Koresh commited on
Commit
f62d95d
·
verified ·
1 Parent(s): ede445b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -561,7 +561,7 @@ def summarize(
561
 
562
  words = text.split()
563
 
564
- if get_tensor_length(words) < 5:
565
  print("Summarization Error: Text is too short, 5 words minimum!")
566
  return text
567
 
@@ -571,7 +571,7 @@ def summarize(
571
  for index in range(math.ceil( len(words) / 512 )):
572
 
573
  chunk = " ".join(words[ index*512:(index+1)*512 ])
574
- inputs = tokenizer.encode( prefix + chunk, return_tensors="pt", max_length=float('inf'), truncation=False)
575
 
576
  while get_tensor_length(inputs) > max_len:
577
 
@@ -587,7 +587,7 @@ def summarize(
587
  toks = tokenizer.decode(inputs[0], skip_special_tokens=True)
588
  ret = ret + ("" if ret == "" else " ") + toks
589
 
590
- inputs = tokenizer.encode( prefix + ret, return_tensors="pt", max_length=float('inf'), truncation=False)
591
  gen = model.generate(
592
  inputs,
593
  length_penalty=1.0,
@@ -723,9 +723,10 @@ def translate(txt,to_lang="en",from_lang=False):
723
  words = txt.split()
724
  ret = ""
725
  for index in range(math.ceil( len(words) / 512 )):
726
- chunk = " ".join(words[ index*512:(index+1)*512 ])
727
- inputs = tokenizer.encode(prefix+chunk, return_tensors="pt", max_length=float('inf'), truncation=False)
728
- gen = model.generate(inputs)
 
729
  toks = tokenizer.decode(gen[0], skip_special_tokens=True)
730
  ret = ret + ("" if ret == "" else " ") + toks
731
  log(f'RET translate with ret as {ret}')
 
561
 
562
  words = text.split()
563
 
564
+ if len(words) < 5:
565
  print("Summarization Error: Text is too short, 5 words minimum!")
566
  return text
567
 
 
571
  for index in range(math.ceil( len(words) / 512 )):
572
 
573
  chunk = " ".join(words[ index*512:(index+1)*512 ])
574
+ inputs = tokenizer.encode( prefix + chunk, return_tensors="pt", truncation=False)
575
 
576
  while get_tensor_length(inputs) > max_len:
577
 
 
587
  toks = tokenizer.decode(inputs[0], skip_special_tokens=True)
588
  ret = ret + ("" if ret == "" else " ") + toks
589
 
590
+ inputs = tokenizer.encode( prefix + ret, return_tensors="pt", truncation=False)
591
  gen = model.generate(
592
  inputs,
593
  length_penalty=1.0,
 
723
  words = txt.split()
724
  ret = ""
725
  for index in range(math.ceil( len(words) / 512 )):
726
+ chunk = " ".join(words[index*512:(index+1)*512])
727
+ log(f'DBG translate chunk is {chunk}')
728
+ inputs = tokenizer.encode(prefix+chunk, return_tensors="pt", truncation=False)
729
+ gen = model.generate(inputs,num_beams=3)
730
  toks = tokenizer.decode(gen[0], skip_special_tokens=True)
731
  ret = ret + ("" if ret == "" else " ") + toks
732
  log(f'RET translate with ret as {ret}')