bluenevus commited on
Commit
7d92703
·
verified ·
1 Parent(s): ac81409

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -8
app.py CHANGED
@@ -42,7 +42,18 @@ def text_to_speech(text, speaker_id):
42
  with torch.no_grad():
43
  sampled = e2tts.sample(mel[:, :5], text=[text])
44
 
45
- return sampled.cpu().numpy().squeeze()
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  def create_podcast(api_key, content, duration, voice1, voice2):
48
  script = generate_podcast_script(api_key, content, duration)
@@ -57,18 +68,22 @@ def render_podcast(api_key, script, voice1, voice2):
57
  audio_segments = []
58
 
59
  for line in lines:
60
- if line.startswith("Host 1:"):
61
- audio = text_to_speech(line[7:], speaker_id=0)
62
- audio_segments.append(audio)
63
- elif line.startswith("Host 2:"):
64
- audio = text_to_speech(line[7:], speaker_id=1)
65
- audio_segments.append(audio)
66
 
67
  if not audio_segments:
68
- return (22050, np.zeros(22050)) # Return silence if no audio was generated
 
69
 
70
  # Concatenate audio segments
71
  podcast_audio = np.concatenate(audio_segments)
 
 
 
 
 
72
  return (22050, podcast_audio) # Assuming 22050 Hz sample rate
73
 
74
  # Gradio Interface
 
42
  with torch.no_grad():
43
  sampled = e2tts.sample(mel[:, :5], text=[text])
44
 
45
+ audio = sampled.cpu().numpy().squeeze()
46
+
47
+ # Check if audio contains any non-zero values
48
+ if np.all(audio == 0):
49
+ print(f"Warning: Generated audio for '{text}' is all zeros.")
50
+ elif np.any(np.isnan(audio)) or np.any(np.isinf(audio)):
51
+ print(f"Warning: Generated audio for '{text}' contains NaN or Inf values.")
52
+
53
+ # Normalize audio to [-1, 1] range
54
+ audio = np.clip(audio, -1, 1)
55
+
56
+ return audio
57
 
58
  def create_podcast(api_key, content, duration, voice1, voice2):
59
  script = generate_podcast_script(api_key, content, duration)
 
68
  audio_segments = []
69
 
70
  for line in lines:
71
+ if line.startswith("Host 1:") or line.startswith("Host 2:"):
72
+ audio = text_to_speech(line[7:], speaker_id=0 if line.startswith("Host 1:") else 1)
73
+ if not np.all(audio == 0) and not np.any(np.isnan(audio)) and not np.any(np.isinf(audio)):
74
+ audio_segments.append(audio)
 
 
75
 
76
  if not audio_segments:
77
+ print("Warning: No valid audio segments were generated.")
78
+ return (22050, np.zeros(22050)) # Return silence if no valid audio was generated
79
 
80
  # Concatenate audio segments
81
  podcast_audio = np.concatenate(audio_segments)
82
+
83
+ # Ensure audio is in the correct range for int16
84
+ podcast_audio = np.clip(podcast_audio, -1, 1) * 32767
85
+ podcast_audio = podcast_audio.astype(np.int16)
86
+
87
  return (22050, podcast_audio) # Assuming 22050 Hz sample rate
88
 
89
  # Gradio Interface