Yaron Koresh commited on
Commit
78cc42c
·
verified ·
1 Parent(s): 290136d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -13
app.py CHANGED
@@ -559,24 +559,21 @@ def get_tensor_length(tensor):
559
  ret = ret * num
560
  return ret
561
 
562
- def _summarize(text, max_words=20):
563
  log(f'CALL _summarize')
564
  prefix = "summarize: "
565
  toks = tokenizer.encode( prefix + text, return_tensors="pt", truncation=False)
566
  gen = model.generate(
567
  toks,
568
- length_penalty=2.0,
569
- num_beams=4,
570
  early_stopping=True,
571
- max_length=max_words
572
  )
573
  ret = tokenizer.decode(gen[0], skip_special_tokens=True)
574
  log(f'RET _summarize with ret as {ret}')
575
  return ret
576
 
577
- def _summ_step(length):
578
- return length // 1.5
579
-
580
  def summarize(text, max_words=20):
581
  log(f'CALL summarize')
582
 
@@ -603,9 +600,7 @@ def summarize(text, max_words=20):
603
  words_length = len(text.split())
604
 
605
  while words_length > max_words:
606
- step = _summ_step(words_length)
607
- mx = words_length - step
608
- text = _summarize(text, mx)
609
  words_length = len(text.split())
610
 
611
  log(f'RET summarize with text as {text}')
@@ -1187,7 +1182,7 @@ class GoogleTranslator(BaseTranslator):
1187
  @param text: desired text to translate
1188
  @return: str: translated text
1189
  """
1190
- if is_input_valid(text, max_chars=5000):
1191
  text = text.strip()
1192
  if self._same_source_target() or is_empty(text):
1193
  return text
@@ -1274,7 +1269,7 @@ def translate(txt,to_lang="en",from_lang="auto"):
1274
  translation = translator.translate(txt)
1275
  translation = translation.strip()
1276
  log(f'RET translate with translation as {translation}')
1277
- return translation
1278
 
1279
  @spaces.GPU(duration=120)
1280
  def handle_generation(artist,song,genre,lyrics):
@@ -1285,11 +1280,14 @@ def handle_generation(artist,song,genre,lyrics):
1285
  pos_song = re.sub(r"([ \t]){1,}", " ", song).lower().strip()
1286
  pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
1287
 
 
1288
  lyrics_lines = re.split(r"([\n]){1,}", lyrics)
1289
 
1290
  for line_index in range(len(lyrics_lines)):
1291
  if re.sub(f'[{punctuation}]$', '', lyrics_lines[line_index]) == lyrics_lines[line_index]:
1292
- lyrics_lines[line_index] = lyrics_lines[line_index] + "."
 
 
1293
  lyrics = " ".join(lyrics_lines)
1294
 
1295
  pos_genre = re.sub(f'[{punctuation}]', '', re.sub(r"([ \t]){1,}", " ", genre)).lower().strip()
 
559
  ret = ret * num
560
  return ret
561
 
562
+ def _summarize(text):
563
  log(f'CALL _summarize')
564
  prefix = "summarize: "
565
  toks = tokenizer.encode( prefix + text, return_tensors="pt", truncation=False)
566
  gen = model.generate(
567
  toks,
568
+ length_penalty=3.0,
569
+ num_beams=math.ceil(len(text.split()) / 6),
570
  early_stopping=True,
571
+ max_length=math.ceil(len(text.split()) / 4)
572
  )
573
  ret = tokenizer.decode(gen[0], skip_special_tokens=True)
574
  log(f'RET _summarize with ret as {ret}')
575
  return ret
576
 
 
 
 
577
  def summarize(text, max_words=20):
578
  log(f'CALL summarize')
579
 
 
600
  words_length = len(text.split())
601
 
602
  while words_length > max_words:
603
+ text = _summarize(text)
 
 
604
  words_length = len(text.split())
605
 
606
  log(f'RET summarize with text as {text}')
 
1182
  @param text: desired text to translate
1183
  @return: str: translated text
1184
  """
1185
+ if is_input_valid(text, max_chars=1000):
1186
  text = text.strip()
1187
  if self._same_source_target() or is_empty(text):
1188
  return text
 
1269
  translation = translator.translate(txt)
1270
  translation = translation.strip()
1271
  log(f'RET translate with translation as {translation}')
1272
+ return translation.lower()
1273
 
1274
  @spaces.GPU(duration=120)
1275
  def handle_generation(artist,song,genre,lyrics):
 
1280
  pos_song = re.sub(r"([ \t]){1,}", " ", song).lower().strip()
1281
  pos_song = ' '.join(word[0].upper() + word[1:] for word in pos_song.split())
1282
 
1283
+ lyrics = re.sub(r"(, | ,)",". ",lyrics)
1284
  lyrics_lines = re.split(r"([\n]){1,}", lyrics)
1285
 
1286
  for line_index in range(len(lyrics_lines)):
1287
  if re.sub(f'[{punctuation}]$', '', lyrics_lines[line_index]) == lyrics_lines[line_index]:
1288
+ lyrics_lines[line_index] = lyrics_lines[line_index].strip() + "."
1289
+ else:
1290
+ lyrics_lines[line_index] = lyrics_lines[line_index].strip()
1291
  lyrics = " ".join(lyrics_lines)
1292
 
1293
  pos_genre = re.sub(f'[{punctuation}]', '', re.sub(r"([ \t]){1,}", " ", genre)).lower().strip()