|
import gradio as gr |
|
from config import * |
|
from event_handlers import * |
|
|
|
|
|
|
|
|
|
|
|
theme = gr.themes.Ocean(primary_hue=gr.themes.colors.blue).set() |
|
|
|
with gr.Blocks( |
|
theme=theme, |
|
title="grAudio", |
|
css=".gradio-container { max-width: 95% !important; }", |
|
) as app: |
|
|
|
|
|
initial_audio_list = load_existing_audio() |
|
audio_list_state = gr.State(value=initial_audio_list) |
|
newly_generated_state = gr.State([]) |
|
|
|
selected_index_state = gr.State(-1) |
|
|
|
|
|
gr.Markdown("# Generate Audio from text") |
|
with gr.Row(equal_height=False): |
|
|
|
with gr.Column(scale=2, min_width=350): |
|
gr.Markdown("### Generation Configuration") |
|
with gr.Accordion("Batch size & Temperatures", open=True): |
|
batch_size_number = gr.Number( |
|
value=1, |
|
label="Seed", |
|
minimum=0, |
|
step=1, |
|
scale=1, |
|
) |
|
semantic_temp_slider = gr.Slider( |
|
0.1, 1.0, value=0.7, step=0.1, label="Semantic Temp" |
|
) |
|
coarse_temp_slider = gr.Slider( |
|
0.1, 1.0, value=0.7, step=0.1, label="Coarse Temp" |
|
) |
|
fine_temp_slider = gr.Slider( |
|
0.1, 1.0, value=0.7, step=0.1, label="Fine Temp" |
|
) |
|
with gr.Accordion("Model, Devices", open=True): |
|
model_type_dropdown = gr.Dropdown( |
|
choices=["small", "large"], value="small", label="Model Type" |
|
) |
|
|
|
available_devices, best_device = get_available_torch_devices() |
|
device_dropdown = gr.Dropdown( |
|
choices=available_devices, value=best_device, label="Device" |
|
) |
|
with gr.Accordion("Voice Prompt", open=True): |
|
prompt_dropdown = gr.Dropdown( |
|
choices=get_available_prompts(), |
|
label="Select Voice Prompt", |
|
info="Optional", |
|
multiselect=False, |
|
allow_custom_value=False, |
|
) |
|
refresh_prompts_btn = gr.Button( |
|
"Refresh Prompts", variant="secondary", size="sm" |
|
) |
|
with gr.Accordion("Create New Voice Prompt", open=False): |
|
prompt_audio_upload = gr.File( |
|
value=None, |
|
file_count="single", |
|
label="Upload Audio (.wav, .mp3)", |
|
file_types=["audio"], |
|
type="filepath", |
|
) |
|
create_prompt_btn = gr.Button("Create Prompt", variant="secondary") |
|
|
|
|
|
with gr.Column(scale=4, min_width=600): |
|
gr.Markdown("### Text Input") |
|
text_input_block = gr.Textbox( |
|
lines=30, |
|
placeholder="""If your text includes multiple long sentences, select a voice prompt to have consistent speech. |
|
Put one sentence on one line. |
|
Do not use long sentence, split them out to multiple sentences with each less than 15 seconds worth of speech""", |
|
label="Text Prompts", |
|
) |
|
generate_btn = gr.Button("Generate", variant="primary") |
|
|
|
with gr.Column(scale=2, min_width=250): |
|
gr.Markdown("### Generated Audio") |
|
|
|
audio_dataframe = gr.DataFrame( |
|
headers=["File", "Prompt", "Duration (s)"], |
|
datatype=["str", "str", "str"], |
|
interactive=True, |
|
row_count=(10, "dynamic"), |
|
col_count=(3, "fixed"), |
|
|
|
) |
|
|
|
selected_audio_player = gr.Audio( |
|
label="Selected Audio", |
|
type="filepath", |
|
interactive=False, |
|
) |
|
|
|
delete_selected_btn = gr.Button("Delete Selected Audio", variant="stop") |
|
|
|
|
|
|
|
|
|
refresh_prompts_btn.click( |
|
fn=update_available_prompts, inputs=None, outputs=[prompt_dropdown] |
|
) |
|
|
|
|
|
create_prompt_btn.click( |
|
fn=create_audio_prompt, |
|
inputs=[prompt_audio_upload, device_dropdown], |
|
outputs=[prompt_dropdown], |
|
) |
|
|
|
|
|
generate_btn.click( |
|
fn=generate_batch_audio, |
|
inputs=[ |
|
text_input_block, |
|
semantic_temp_slider, |
|
coarse_temp_slider, |
|
fine_temp_slider, |
|
batch_size_number, |
|
model_type_dropdown, |
|
device_dropdown, |
|
prompt_dropdown, |
|
], |
|
outputs=[newly_generated_state], |
|
) |
|
|
|
|
|
newly_generated_state.change( |
|
fn=update_audio_list, |
|
inputs=[newly_generated_state, audio_list_state], |
|
outputs=[audio_list_state], |
|
show_progress="hidden", |
|
) |
|
|
|
|
|
|
|
audio_list_state.change( |
|
fn=format_audio_list_for_dataframe, |
|
inputs=[audio_list_state], |
|
outputs=[audio_dataframe], |
|
show_progress="hidden", |
|
).then( |
|
fn=lambda: (None, -1), |
|
inputs=None, |
|
outputs=[selected_audio_player, selected_index_state], |
|
show_progress="hidden", |
|
queue=False, |
|
) |
|
|
|
|
|
audio_dataframe.select( |
|
fn=handle_row_selection, |
|
inputs=[audio_list_state], |
|
outputs=[ |
|
selected_audio_player, |
|
selected_index_state, |
|
], |
|
show_progress="hidden", |
|
) |
|
|
|
|
|
delete_selected_btn.click( |
|
fn=handle_delete_selected, |
|
inputs=[selected_index_state, audio_list_state], |
|
outputs=[ |
|
audio_list_state, |
|
selected_index_state, |
|
selected_audio_player, |
|
], |
|
show_progress="hidden", |
|
) |
|
|
|
|
|
app.load( |
|
fn=format_audio_list_for_dataframe, |
|
inputs=[audio_list_state], |
|
outputs=[audio_dataframe], |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
app.launch(debug=True, share=False) |
|
|