Spaces:
Sleeping
Sleeping
Yaron Koresh
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -568,9 +568,9 @@ def summarize(
|
|
568 |
prefix = "summarize: "
|
569 |
ret = ""
|
570 |
|
571 |
-
for index in range(math.ceil( len(words) /
|
572 |
|
573 |
-
chunk = " ".join(words[ index*
|
574 |
inputs = tokenizer.encode( prefix + chunk, return_tensors="pt", truncation=False)
|
575 |
|
576 |
while get_tensor_length(inputs) > max_len:
|
@@ -716,14 +716,15 @@ def translate(txt,to_lang="en",from_lang=False):
|
|
716 |
log(f'CALL translate')
|
717 |
if not from_lang:
|
718 |
from_lang = get_language(txt)
|
|
|
719 |
if(from_lang == to_lang):
|
720 |
log(f'RET translate with txt as {txt}')
|
721 |
return txt
|
722 |
prefix = f"translate {language_codes[from_lang]} to {language_codes[to_lang]}: "
|
723 |
words = txt.split()
|
724 |
ret = ""
|
725 |
-
for index in range(math.ceil( len(words) /
|
726 |
-
chunk = " ".join(words[index*
|
727 |
log(f'DBG translate chunk is {chunk}')
|
728 |
inputs = tokenizer.encode(prefix+chunk, return_tensors="pt", truncation=False)
|
729 |
gen = model.generate(inputs,num_beams=3)
|
|
|
568 |
prefix = "summarize: "
|
569 |
ret = ""
|
570 |
|
571 |
+
for index in range(math.ceil( len(words) / 500 )):
|
572 |
|
573 |
+
chunk = " ".join(words[ index*500:(index+1)*500 ])
|
574 |
inputs = tokenizer.encode( prefix + chunk, return_tensors="pt", truncation=False)
|
575 |
|
576 |
while get_tensor_length(inputs) > max_len:
|
|
|
716 |
log(f'CALL translate')
|
717 |
if not from_lang:
|
718 |
from_lang = get_language(txt)
|
719 |
+
print(f"translating from {from_lang} to {to_lang}")
|
720 |
if(from_lang == to_lang):
|
721 |
log(f'RET translate with txt as {txt}')
|
722 |
return txt
|
723 |
prefix = f"translate {language_codes[from_lang]} to {language_codes[to_lang]}: "
|
724 |
words = txt.split()
|
725 |
ret = ""
|
726 |
+
for index in range(math.ceil( len(words) / 500 )):
|
727 |
+
chunk = " ".join(words[index*500:(index+1)*500])
|
728 |
log(f'DBG translate chunk is {chunk}')
|
729 |
inputs = tokenizer.encode(prefix+chunk, return_tensors="pt", truncation=False)
|
730 |
gen = model.generate(inputs,num_beams=3)
|