Kokoro-API

Running

Yaron Koresh commited on Jan 21

Commit

362ca9f

verified ·

1 Parent(s): f62d95d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -568,9 +568,9 @@ def summarize(
     prefix = "summarize: "
     ret = ""
-    for index in range(math.ceil( len(words) / 512 )):
-        chunk = " ".join(words[ index*512:(index+1)*512 ])
         inputs = tokenizer.encode( prefix + chunk, return_tensors="pt", truncation=False)
         while get_tensor_length(inputs) > max_len:
@@ -716,14 +716,15 @@ def translate(txt,to_lang="en",from_lang=False):
     log(f'CALL translate')
     if not from_lang:
         from_lang = get_language(txt)
     if(from_lang == to_lang):
         log(f'RET translate with txt as {txt}')
         return txt
     prefix = f"translate {language_codes[from_lang]} to {language_codes[to_lang]}: "
     words = txt.split()
     ret = ""
-    for index in range(math.ceil( len(words) / 512 )):
-        chunk = " ".join(words[index*512:(index+1)*512])
         log(f'DBG translate chunk is {chunk}')
         inputs = tokenizer.encode(prefix+chunk, return_tensors="pt", truncation=False)
         gen = model.generate(inputs,num_beams=3)

     prefix = "summarize: "
     ret = ""
+    for index in range(math.ceil( len(words) / 500 )):
+        chunk = " ".join(words[ index*500:(index+1)*500 ])
         inputs = tokenizer.encode( prefix + chunk, return_tensors="pt", truncation=False)
         while get_tensor_length(inputs) > max_len:
     log(f'CALL translate')
     if not from_lang:
         from_lang = get_language(txt)
+    print(f"translating from {from_lang} to {to_lang}")
     if(from_lang == to_lang):
         log(f'RET translate with txt as {txt}')
         return txt
     prefix = f"translate {language_codes[from_lang]} to {language_codes[to_lang]}: "
     words = txt.split()
     ret = ""
+    for index in range(math.ceil( len(words) / 500 )):
+        chunk = " ".join(words[index*500:(index+1)*500])
         log(f'DBG translate chunk is {chunk}')
         inputs = tokenizer.encode(prefix+chunk, return_tensors="pt", truncation=False)
         gen = model.generate(inputs,num_beams=3)