Spaces:
Sleeping
Sleeping
Yaron Koresh
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -561,7 +561,7 @@ def summarize(
|
|
561 |
|
562 |
words = text.split()
|
563 |
|
564 |
-
if
|
565 |
print("Summarization Error: Text is too short, 5 words minimum!")
|
566 |
return text
|
567 |
|
@@ -571,7 +571,7 @@ def summarize(
|
|
571 |
for index in range(math.ceil( len(words) / 512 )):
|
572 |
|
573 |
chunk = " ".join(words[ index*512:(index+1)*512 ])
|
574 |
-
inputs = tokenizer.encode( prefix + chunk, return_tensors="pt",
|
575 |
|
576 |
while get_tensor_length(inputs) > max_len:
|
577 |
|
@@ -587,7 +587,7 @@ def summarize(
|
|
587 |
toks = tokenizer.decode(inputs[0], skip_special_tokens=True)
|
588 |
ret = ret + ("" if ret == "" else " ") + toks
|
589 |
|
590 |
-
inputs = tokenizer.encode( prefix + ret, return_tensors="pt",
|
591 |
gen = model.generate(
|
592 |
inputs,
|
593 |
length_penalty=1.0,
|
@@ -723,9 +723,10 @@ def translate(txt,to_lang="en",from_lang=False):
|
|
723 |
words = txt.split()
|
724 |
ret = ""
|
725 |
for index in range(math.ceil( len(words) / 512 )):
|
726 |
-
chunk = " ".join(words[
|
727 |
-
|
728 |
-
|
|
|
729 |
toks = tokenizer.decode(gen[0], skip_special_tokens=True)
|
730 |
ret = ret + ("" if ret == "" else " ") + toks
|
731 |
log(f'RET translate with ret as {ret}')
|
|
|
561 |
|
562 |
words = text.split()
|
563 |
|
564 |
+
if len(words) < 5:
|
565 |
print("Summarization Error: Text is too short, 5 words minimum!")
|
566 |
return text
|
567 |
|
|
|
571 |
for index in range(math.ceil( len(words) / 512 )):
|
572 |
|
573 |
chunk = " ".join(words[ index*512:(index+1)*512 ])
|
574 |
+
inputs = tokenizer.encode( prefix + chunk, return_tensors="pt", truncation=False)
|
575 |
|
576 |
while get_tensor_length(inputs) > max_len:
|
577 |
|
|
|
587 |
toks = tokenizer.decode(inputs[0], skip_special_tokens=True)
|
588 |
ret = ret + ("" if ret == "" else " ") + toks
|
589 |
|
590 |
+
inputs = tokenizer.encode( prefix + ret, return_tensors="pt", truncation=False)
|
591 |
gen = model.generate(
|
592 |
inputs,
|
593 |
length_penalty=1.0,
|
|
|
723 |
words = txt.split()
|
724 |
ret = ""
|
725 |
for index in range(math.ceil( len(words) / 512 )):
|
726 |
+
chunk = " ".join(words[index*512:(index+1)*512])
|
727 |
+
log(f'DBG translate chunk is {chunk}')
|
728 |
+
inputs = tokenizer.encode(prefix+chunk, return_tensors="pt", truncation=False)
|
729 |
+
gen = model.generate(inputs,num_beams=3)
|
730 |
toks = tokenizer.decode(gen[0], skip_special_tokens=True)
|
731 |
ret = ret + ("" if ret == "" else " ") + toks
|
732 |
log(f'RET translate with ret as {ret}')
|