hynt commited on
Commit
80a8ac7
·
verified ·
1 Parent(s): 2afa6c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -4,9 +4,10 @@ from huggingface_hub import login
4
  import gradio as gr
5
  from cached_path import cached_path
6
  import tempfile
 
7
  from vinorm import TTSnorm
8
  from infer_zipvoice import model, tokenizer, feature_extractor, device
9
- from utils import preprocess_ref_audio_text, save_spectrogram
10
 
11
  # Retrieve token from secrets
12
  hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
@@ -39,21 +40,28 @@ def infer_tts(ref_audio_orig: str, gen_text: str, speed: float = 1.0, request: g
39
  raise gr.Error("Please enter text content with less than 1000 words.")
40
 
41
  try:
42
- ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, "")
43
- final_wave = generate_sentence(
44
- ref_text.lower(),
45
- ref_audio,
46
- post_process(TTSnorm(gen_text)).lower(),
47
- model=model,
48
- vocoder=vocoder,
49
- tokenizer=tokenizer,
50
- feature_extractor=feature_extractor,
51
- device=device,
52
- speed=speed
53
- )
 
 
 
 
 
 
 
54
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_spectrogram:
55
  spectrogram_path = tmp_spectrogram.name
56
- save_spectrogram(final_wave, spectrogram_path)
57
 
58
  return (final_sample_rate, final_wave), spectrogram_path
59
  except Exception as e:
 
4
  import gradio as gr
5
  from cached_path import cached_path
6
  import tempfile
7
+ import numpy as np
8
  from vinorm import TTSnorm
9
  from infer_zipvoice import model, tokenizer, feature_extractor, device
10
+ from utils import preprocess_ref_audio_text, save_spectrogram, chunk_text
11
 
12
  # Retrieve token from secrets
13
  hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
 
40
  raise gr.Error("Please enter text content with less than 1000 words.")
41
 
42
  try:
43
+ gen_texts = chunk_text(gen_text)
44
+ final_wave_total = None
45
+ for i, gen_text in enumerate(gen_texts):
46
+ ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, "")
47
+ final_wave = generate_sentence(
48
+ ref_text.lower(),
49
+ ref_audio,
50
+ post_process(TTSnorm(gen_text)).lower(),
51
+ model=model,
52
+ vocoder=vocoder,
53
+ tokenizer=tokenizer,
54
+ feature_extractor=feature_extractor,
55
+ device=device,
56
+ speed=speed
57
+ )
58
+ if i == 0:
59
+ final_wave_total = final_wave
60
+ else:
61
+ final_wave_total = np.concatenate((final_wave_total, final_wave), axis=0)
62
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_spectrogram:
63
  spectrogram_path = tmp_spectrogram.name
64
+ save_spectrogram(final_wave_total, spectrogram_path)
65
 
66
  return (final_sample_rate, final_wave), spectrogram_path
67
  except Exception as e: