Spaces:

dangtr0408
/

StyleTTS2-lite-space

Running on Zero

App Files Files Community

dangtr0408 commited on Apr 25

Commit

156f2a1

1 Parent(s): f522903

minor fixes

Browse files

Files changed (1) hide show

app.py +10 -7

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import soundfile as sf
 import torch
 import traceback
 import random
 import spaces
 import sys
@@ -98,8 +99,8 @@ def main(text_prompt, reference_paths, speed, denoise, avg_style, stabilize):
             styles  = model.get_styles(speaker, denoise, avg_style)
             r       = model.generate(phonemes, styles, stabilize, 18)
-        sf.write("output_demo.wav", r, samplerate=24000)
-        return "output_demo.wav", "Audio generated successfully!"
     except Exception as e:
         error_message = traceback.format_exc()
@@ -127,9 +128,9 @@ with gr.Blocks() as demo:
     with gr.Row(equal_height=True):
         with gr.Column():
             speed = gr.Slider(0.0, 2.0, step=0.1, value=1.0, label="Speed")
-            denoise = gr.Slider(0.0, 1.0, step=0.1, value=0.0, label="Denoise Strength")
-            avg_style = gr.Checkbox(label="Use Average Styles", value=False)
-            stabilize = gr.Checkbox(label="Stabilize Speaking Speed", value=False)
             text_prompt = gr.Textbox(label="Text Prompt", placeholder="Enter your text here...", lines=10, max_lines=10)
@@ -138,13 +139,15 @@ with gr.Blocks() as demo:
         with gr.Column():
             reference_audios = gr.Audio(label="Reference Audios", type='filepath')
-            synthesized_audio = gr.Audio(label="Generate Audio", type="filepath")
             example_voices = gr.Dropdown(
                 label="Example voices",
                 choices=voice_choices,
-                value=voice_choices[0][0],
                 interactive=True,
             )
             with gr.Row(equal_height=False):

 import torch
 import traceback
 import random
+import numpy as np
 import spaces
 import sys
             styles  = model.get_styles(speaker, denoise, avg_style)
             r       = model.generate(phonemes, styles, stabilize, 18)
+        r = r / np.max(np.abs(r)) #Normalize
+        return (24000, r), "Audio generated successfully!"
     except Exception as e:
         error_message = traceback.format_exc()
     with gr.Row(equal_height=True):
         with gr.Column():
             speed = gr.Slider(0.0, 2.0, step=0.1, value=1.0, label="Speed")
+            denoise = gr.Slider(0.0, 1.0, step=0.1, value=0.2, label="Denoise Strength")
+            avg_style = gr.Checkbox(label="Use Average Styles", value=True)
+            stabilize = gr.Checkbox(label="Stabilize Speaking Speed", value=True)
             text_prompt = gr.Textbox(label="Text Prompt", placeholder="Enter your text here...", lines=10, max_lines=10)
         with gr.Column():
             reference_audios = gr.Audio(label="Reference Audios", type='filepath')
+            synthesized_audio = gr.Audio(label="Generate Audio", type='numpy')
             example_voices = gr.Dropdown(
                 label="Example voices",
                 choices=voice_choices,
+                value=None,
                 interactive=True,
+                allow_custom_value=False,
+                filterable=False
             )
             with gr.Row(equal_height=False):