Spaces:
Sleeping
Sleeping
Yaron Koresh
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -559,24 +559,21 @@ def get_tensor_length(tensor):
|
|
559 |
ret = ret * num
|
560 |
return ret
|
561 |
|
562 |
-
def _summarize(text
|
563 |
log(f'CALL _summarize')
|
564 |
prefix = "summarize: "
|
565 |
toks = tokenizer.encode( prefix + text, return_tensors="pt", truncation=False)
|
566 |
gen = model.generate(
|
567 |
toks,
|
568 |
-
length_penalty=
|
569 |
-
num_beams=
|
570 |
early_stopping=True,
|
571 |
-
max_length=
|
572 |
)
|
573 |
ret = tokenizer.decode(gen[0], skip_special_tokens=True)
|
574 |
log(f'RET _summarize with ret as {ret}')
|
575 |
return ret
|
576 |
|
577 |
-
def _summ_step(length):
|
578 |
-
return length // 1.5
|
579 |
-
|
580 |
def summarize(text, max_words=20):
|
581 |
log(f'CALL summarize')
|
582 |
|
@@ -603,9 +600,7 @@ def summarize(text, max_words=20):
|
|
603 |
words_length = len(text.split())
|
604 |
|
605 |
while words_length > max_words:
|
606 |
-
|
607 |
-
mx = words_length - step
|
608 |
-
text = _summarize(text, mx)
|
609 |
words_length = len(text.split())
|
610 |
|
611 |
log(f'RET summarize with text as {text}')
|
@@ -1187,7 +1182,7 @@ class GoogleTranslator(BaseTranslator):
|
|
1187 |
@param text: desired text to translate
|
1188 |
@return: str: translated text
|
1189 |
"""
|
1190 |
-
if is_input_valid(text, max_chars=
|
1191 |
text = text.strip()
|
1192 |
if self._same_source_target() or is_empty(text):
|
1193 |
return text
|
@@ -1274,7 +1269,7 @@ def translate(txt,to_lang="en",from_lang="auto"):
|
|
1274 |
translation = translator.translate(txt)
|
1275 |
translation = translation.strip()
|
1276 |
log(f'RET translate with translation as {translation}')
|
1277 |
-
return translation
|
1278 |
|
1279 |
@spaces.GPU(duration=120)
|
1280 |
def handle_generation(artist,song,genre,lyrics):
|
@@ -1285,11 +1280,14 @@ def handle_generation(artist,song,genre,lyrics):
|
|
1285 |
pos_song = re.sub(r"([ \t]){1,}", " ", song).lower().strip()
|
1286 |
pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
|
1287 |
|
|
|
1288 |
lyrics_lines = re.split(r"([\n]){1,}", lyrics)
|
1289 |
|
1290 |
for line_index in range(len(lyrics_lines)):
|
1291 |
if re.sub(f'[{punctuation}]$', '', lyrics_lines[line_index]) == lyrics_lines[line_index]:
|
1292 |
-
lyrics_lines[line_index] = lyrics_lines[line_index] + "."
|
|
|
|
|
1293 |
lyrics = " ".join(lyrics_lines)
|
1294 |
|
1295 |
pos_genre = re.sub(f'[{punctuation}]', '', re.sub(r"([ \t]){1,}", " ", genre)).lower().strip()
|
|
|
559 |
ret = ret * num
|
560 |
return ret
|
561 |
|
562 |
+
def _summarize(text):
|
563 |
log(f'CALL _summarize')
|
564 |
prefix = "summarize: "
|
565 |
toks = tokenizer.encode( prefix + text, return_tensors="pt", truncation=False)
|
566 |
gen = model.generate(
|
567 |
toks,
|
568 |
+
length_penalty=3.0,
|
569 |
+
num_beams=math.ceil(len(text.split()) / 6),
|
570 |
early_stopping=True,
|
571 |
+
max_length=math.ceil(len(text.split()) / 4)
|
572 |
)
|
573 |
ret = tokenizer.decode(gen[0], skip_special_tokens=True)
|
574 |
log(f'RET _summarize with ret as {ret}')
|
575 |
return ret
|
576 |
|
|
|
|
|
|
|
577 |
def summarize(text, max_words=20):
|
578 |
log(f'CALL summarize')
|
579 |
|
|
|
600 |
words_length = len(text.split())
|
601 |
|
602 |
while words_length > max_words:
|
603 |
+
text = _summarize(text)
|
|
|
|
|
604 |
words_length = len(text.split())
|
605 |
|
606 |
log(f'RET summarize with text as {text}')
|
|
|
1182 |
@param text: desired text to translate
|
1183 |
@return: str: translated text
|
1184 |
"""
|
1185 |
+
if is_input_valid(text, max_chars=1000):
|
1186 |
text = text.strip()
|
1187 |
if self._same_source_target() or is_empty(text):
|
1188 |
return text
|
|
|
1269 |
translation = translator.translate(txt)
|
1270 |
translation = translation.strip()
|
1271 |
log(f'RET translate with translation as {translation}')
|
1272 |
+
return translation.lower()
|
1273 |
|
1274 |
@spaces.GPU(duration=120)
|
1275 |
def handle_generation(artist,song,genre,lyrics):
|
|
|
1280 |
pos_song = re.sub(r"([ \t]){1,}", " ", song).lower().strip()
|
1281 |
pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
|
1282 |
|
1283 |
+
lyrics = re.sub(r"(, | ,)",". ",lyrics)
|
1284 |
lyrics_lines = re.split(r"([\n]){1,}", lyrics)
|
1285 |
|
1286 |
for line_index in range(len(lyrics_lines)):
|
1287 |
if re.sub(f'[{punctuation}]$', '', lyrics_lines[line_index]) == lyrics_lines[line_index]:
|
1288 |
+
lyrics_lines[line_index] = lyrics_lines[line_index].strip() + "."
|
1289 |
+
else:
|
1290 |
+
lyrics_lines[line_index] = lyrics_lines[line_index].strip()
|
1291 |
lyrics = " ".join(lyrics_lines)
|
1292 |
|
1293 |
pos_genre = re.sub(f'[{punctuation}]', '', re.sub(r"([ \t]){1,}", " ", genre)).lower().strip()
|