File size: 3,600 Bytes
8a5eb92
 
 
ecc7e05
8a5eb92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc9d6fb
8a5eb92
 
 
 
 
 
 
 
 
 
 
 
515ff46
cc9d6fb
 
 
 
 
515ff46
 
 
 
 
 
 
8a5eb92
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os
import gradio as gr
from lib.infer import infer_audio
#from google.colab import files
from pydub import AudioSegment
import zipfile
import shutil
import urllib.request
import gdown

main_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

os.chdir(main_dir)

def upload_audio(model_name, sound_path, f0_change, f0_method, min_pitch, max_pitch,
                 crepe_hop_length, index_rate, filter_radius, rms_mix_rate,
                 protect, split_infer, min_silence, silence_threshold, 
                 seek_step, keep_silence, formant_shift, quefrency, timbre, 
                 f0_autotune, output_format):
    
    if not sound_path:
        uploaded_audio = files.upload()
        assert len(uploaded_audio) == 1, "Please only input audio one at a time"
        sound_path = os.path.join(os.getcwd(), list(uploaded_audio.keys())[0])
    
    inferred_audio = infer_audio(
        model_name,
        sound_path,
        f0_change,
        f0_method,
        min_pitch,
        max_pitch,
        crepe_hop_length,
        index_rate,
        filter_radius,
        rms_mix_rate,
        protect,
        split_infer,
        min_silence,
        silence_threshold,
        seek_step,
        keep_silence,
        formant_shift,
        quefrency,
        timbre,
        f0_autotune,
        output_format
    )
    
    return AudioSegment.from_file(inferred_audio)

def download_model(url, dir_name):
    models_dir = "models"
    extraction_folder = os.path.join(models_dir, dir_name)

    if os.path.exists(extraction_folder):
        return f'Voice model directory {dir_name} already exists! Choose a different name.'

    if 'pixeldrain.com' in url:
        zip_name = url.split('/')[-1]
        url = f'https://pixeldrain.com/api/file/{zip_name}'
    elif 'drive.google.com' in url:
        zip_name = dir_name + ".zip"
        gdown.download(url, output=zip_name, use_cookies=True, quiet=True, fuzzy=True)
    else:
        zip_name = url.split('/')[-1]
        urllib.request.urlretrieve(url, zip_name)

    with zipfile.ZipFile(zip_name, 'r') as zip_ref:
        zip_ref.extractall(extraction_folder)
    
    os.remove(zip_name)
    
    return f'{dir_name} model successfully downloaded!'

with gr.Blocks() as app:
    gr.Markdown("## Inference")
    
    with gr.Row():
        model_name = gr.Textbox(label="Model Name")
        sound_path = gr.Audio(label="Audio Path")
        
    with gr.Row():
        f0_change = gr.Slider(minimum=-12, maximum=12, label="F0 Change (semitones)", value=0)
        f0_method = gr.Dropdown(choices=["crepe", "harvest", "mangio-crepe", "rmvpe", "rmvpe+", "fcpe", "fcpe_legacy", "hybrid[mangio-crepe+rmvpe]", "hybrid[mangio-crepe+fcpe]", "hybrid[rmvpe+fcpe]", "hybrid[mangio-crepe+rmvpe+fcpe]"], label="F0 Method", value="fcpe")

    # Add more settings as required
    # Example for output format
    output_format = gr.Dropdown(choices=["wav", "flac", "mp3"], label="Output Format", value="wav")
    
    
    output_audio = gr.Audio(label="Inferred Audio Output")

    
    with gr.Tab("Download Models"):
        url = gr.Textbox(label="Model Download URL")
        dir_name = gr.Textbox(label="Desired Model Name")
        download_button = gr.Button("Download Model")
        download_output = gr.Textbox(label="output")
        
    submit_button = gr.Button("Infer")
    download_button.click(download_model, inputs=[url, dir_name], outputs=download_output)
    submit_button.click(upload_audio, inputs=[model_name, sound_path, f0_change, f0_method, output_format], outputs=output_audio)

   


app.launch()