Yaron Koresh commited on
Commit
362ca9f
·
verified ·
1 Parent(s): f62d95d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -568,9 +568,9 @@ def summarize(
568
  prefix = "summarize: "
569
  ret = ""
570
 
571
- for index in range(math.ceil( len(words) / 512 )):
572
 
573
- chunk = " ".join(words[ index*512:(index+1)*512 ])
574
  inputs = tokenizer.encode( prefix + chunk, return_tensors="pt", truncation=False)
575
 
576
  while get_tensor_length(inputs) > max_len:
@@ -716,14 +716,15 @@ def translate(txt,to_lang="en",from_lang=False):
716
  log(f'CALL translate')
717
  if not from_lang:
718
  from_lang = get_language(txt)
 
719
  if(from_lang == to_lang):
720
  log(f'RET translate with txt as {txt}')
721
  return txt
722
  prefix = f"translate {language_codes[from_lang]} to {language_codes[to_lang]}: "
723
  words = txt.split()
724
  ret = ""
725
- for index in range(math.ceil( len(words) / 512 )):
726
- chunk = " ".join(words[index*512:(index+1)*512])
727
  log(f'DBG translate chunk is {chunk}')
728
  inputs = tokenizer.encode(prefix+chunk, return_tensors="pt", truncation=False)
729
  gen = model.generate(inputs,num_beams=3)
 
568
  prefix = "summarize: "
569
  ret = ""
570
 
571
+ for index in range(math.ceil( len(words) / 500 )):
572
 
573
+ chunk = " ".join(words[ index*500:(index+1)*500 ])
574
  inputs = tokenizer.encode( prefix + chunk, return_tensors="pt", truncation=False)
575
 
576
  while get_tensor_length(inputs) > max_len:
 
716
  log(f'CALL translate')
717
  if not from_lang:
718
  from_lang = get_language(txt)
719
+ print(f"translating from {from_lang} to {to_lang}")
720
  if(from_lang == to_lang):
721
  log(f'RET translate with txt as {txt}')
722
  return txt
723
  prefix = f"translate {language_codes[from_lang]} to {language_codes[to_lang]}: "
724
  words = txt.split()
725
  ret = ""
726
+ for index in range(math.ceil( len(words) / 500 )):
727
+ chunk = " ".join(words[index*500:(index+1)*500])
728
  log(f'DBG translate chunk is {chunk}')
729
  inputs = tokenizer.encode(prefix+chunk, return_tensors="pt", truncation=False)
730
  gen = model.generate(inputs,num_beams=3)