Spaces:
Build error
Build error
File size: 8,561 Bytes
20d05ae 925d97e 20d05ae 925d97e db8bf7f 0cb3834 925d97e db8bf7f 925d97e db8bf7f 925d97e eb3ba2e 925d97e 42cf67e 925d97e f98d769 925d97e db8bf7f 925d97e db8bf7f 925d97e eb3ba2e 925d97e db8bf7f 925d97e 20d05ae 0cb3834 db8bf7f f65d2ed 46a6192 a87192b db8bf7f a87192b db8bf7f a87192b db8bf7f a87192b db8bf7f a87192b db8bf7f a87192b db8bf7f a87192b db8bf7f a87192b db8bf7f 0cb3834 db8bf7f 0cb3834 db8bf7f a87192b 0cb3834 eb3ba2e db8bf7f eb3ba2e db8bf7f eb3ba2e 6f1ebe2 db8bf7f f98d769 6f1ebe2 db8bf7f 6f1ebe2 eb3ba2e db8bf7f eb3ba2e f98d769 eb3ba2e db8bf7f 6f1ebe2 f98d769 6f1ebe2 f98d769 925d97e db8bf7f 925d97e db8bf7f 925d97e db8bf7f 925d97e db8bf7f 925d97e db8bf7f 0cb3834 db8bf7f 0cb3834 db8bf7f 0cb3834 db8bf7f 0cb3834 db8bf7f 0cb3834 db8bf7f 0cb3834 db8bf7f 0cb3834 db8bf7f eb3ba2e db8bf7f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import gradio as gr
import os
from constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
import platform
from models.model import *
from tts.conversion import COQUI_LANGUAGES
import pytube
import os
import traceback
from pydub import AudioSegment
# from audio_enhance.functions import audio_enhance
def convert_yt_to_wav(url):
if not url:
return "Please enter the video link first", None
try:
print(f"Converting video {url}...")
# Download the video using pytube
video = pytube.YouTube(url)
stream = video.streams.filter(only_audio=True).first()
video_output_folder = os.path.join(f"yt_videos") # Destination folder path
audio_output_folder = 'audios'
print("Downloading video")
video_file_path = stream.download(output_path=video_output_folder)
print(video_file_path)
file_name = os.path.basename(video_file_path)
audio_file_path = os.path.join(audio_output_folder, file_name.replace('.mp4','.wav'))
# Convert mp4 to wav
print("Converting to wav")
sound = AudioSegment.from_file(video_file_path, format="mp4")
sound.export(audio_file_path, format="wav")
if os.path.exists(video_file_path):
os.remove(video_file_path)
return "Success", audio_file_path
except ConnectionResetError as cre:
return "Connection lost, please refresh or try again later.", None
except Exception as e:
return str(e), None
with gr.Blocks() as app:
gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
gr.HTML("<h4> This space uses CPU only, so it's for inference only. It's recommended to duplicate the space to avoid issues with processing queues. </h4>")
gr.Markdown("Simple RVC GPU Inference on colab: [](https://colab.research.google.com/drive/1NKqqTR04HujeBxzwe7jbYEvNi8LbxD_N?usp=sharing)")
gr.Markdown(
"[](https://huggingface.co/spaces/juuxn/SimpleRVC?duplicate=true)\n\n"
)
gr.Markdown("Collection of models you can use: RVC + Kits ai. **[RVC Community Models](https://docs.google.com/spreadsheets/d/1owfUtQuLW9ReiIwg6U9UkkDmPOTkuNHf0OKQtWu1iaI)**")
with gr.Tab("Inference"):
model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Model URL", show_label=True)
with gr.Row():
with gr.Column():
audio_path = gr.Audio(label="Audio file", show_label=True, type="filepath")
index_rate = gr.Slider(minimum=0, maximum=1, label="Search feature ratio:", value=0.75, interactive=True)
filter_radius1 = gr.Slider(minimum=0, maximum=7, label="Filter (reduce breath harshness)", value=3, step=1, interactive=True)
with gr.Column():
f0_method = gr.Dropdown(choices=["harvest", "pm", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny", "rmvpe"],
value="rmvpe",
label="Algorithm", show_label=True)
vc_transform0 = gr.Slider(minimum=-12, label="Number of semitones, raise an octave: 12, lower an octave: -12", value=0, maximum=12, step=1)
protect0 = gr.Slider(
minimum=0, maximum=0.5, label="Protect voiceless consonants and breath sounds. 0.5 to disable.", value=0.33,
step=0.01,
interactive=True,
)
resample_sr1 = gr.Slider(
minimum=0,
maximum=48000,
label="Resample output audio to the final sampling frequency. 0 to disable resampling.",
value=0,
step=1,
interactive=True,
)
# Output
with gr.Row():
vc_output1 = gr.Textbox(label="Output")
vc_output2 = gr.Audio(label="Output audio")
btn = gr.Button(value="Convert")
btn.click(infer, inputs=[model_url, f0_method, audio_path, index_rate, vc_transform0, protect0, resample_sr1, filter_radius1], outputs=[vc_output1, vc_output2])
with gr.TabItem("TTS"):
with gr.Row():
tts_text = gr.Textbox(
label="Text:",
placeholder="Text you want to convert to speech...",
lines=6,
)
with gr.Column():
with gr.Row():
tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="RVC Model URL", show_label=True)
with gr.Row():
tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="TTS Method:", visible=True)
tts_model = gr.Dropdown(choices=EDGE_VOICES, label="TTS Model:", visible=True, interactive=True)
tts_api_key = gr.Textbox(label="ElevenLabs API key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a",interactive=True, visible=False)
tts_coqui_languages = gr.Radio(
label="Language",
choices=COQUI_LANGUAGES,
value="en",
visible=False
)
tts_btn = gr.Button(value="Convert")
with gr.Row():
tts_vc_output1 = gr.Textbox(label="Output")
tts_vc_output2 = gr.Audio(label="Output audio")
tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key, tts_coqui_languages], outputs=[tts_vc_output1, tts_vc_output2])
tts_msg = gr.Markdown("""**I recommend creating an Eleven Labs account and adding your API key. It’s free, and you get a 10k character limit per month.** <br/>

""", visible=False)
tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key, tts_coqui_languages])
with gr.TabItem("YouTube"):
gr.Markdown("## Convert YouTube video to audio")
with gr.Row():
yt_url = gr.Textbox(
label="Video URL:",
placeholder="https://www.youtube.com/watch?v=3vEiqil5d3Q"
)
yt_btn = gr.Button(value="Convert")
with gr.Row():
yt_output1 = gr.Textbox(label="Output")
yt_output2 = gr.Audio(label="Output audio")
yt_btn.click(fn=convert_yt_to_wav, inputs=[yt_url], outputs=[yt_output1, yt_output2])
with gr.Tab("Models"):
gr.HTML("<h4>Search Models</h4>")
search_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Name", show_label=True)
# Output
with gr.Row():
sarch_output = gr.Markdown(label="Output")
btn_search_model = gr.Button(value="Search")
btn_search_model.click(fn=search_model, inputs=[search_name], outputs=[sarch_output])
gr.HTML("<h4>Publish Your Model</h4>")
post_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Name", show_label=True)
post_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Model URL", show_label=True)
post_creator = gr.Textbox(placeholder="Discord ID or creator profile link", label="Creator", show_label=True)
post_version = gr.Dropdown(choices=["RVC v1", "RVC v2"], value="RVC v1", label="Version", show_label=True)
# Output
with gr.Row():
post_output = gr.Markdown(label="Output")
btn_post_model = gr.Button(value="Publish")
btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
gr.Markdown(
"""For commercial use of the models and spaces, consider purchasing a license, or negotiate one with the voice creators."""
)
if __name__ == "__main__":
app.queue().launch(debug=True)
|