diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..77c87f7c3b129142089398d23fa12e8470cdf25f --- /dev/null +++ b/.gitattributes @@ -0,0 +1,39 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +*.mp3 filter=lfs diff=lfs merge=lfs -text +*.mp4 filter=lfs diff=lfs merge=lfs -text +*.jpg filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..bee8a64b79a99590d5303307144172cfe824fbf7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0649e623f38932acb3242e2f33bd8561c9804214 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +--- +title: sutra-avatar-v2 +emoji: 🐨 +colorFrom: blue +colorTo: red +sdk: gradio +sdk_version: 5.3.0 +app_file: app.py +pinned: false +license: mit +--- + +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..c1169fb340c46f71eddc43bbb50440842a969203 --- /dev/null +++ b/app.py @@ -0,0 +1,441 @@ +# coding: utf-8 + +import argparse +import glob +import os +from pathlib import Path + +import gradio as gr + +from cloud_task_executor import CloudTaskExecutor +from elevenlabs_helper import ElevenLabsHelper + +# --- +talk_key = "talk" +valid_base_motion_expressions = [ + f"{talk_key}-head", + f"{talk_key}-neutral", + "smile", + "approve", + "disapprove", + "confused", + "sad", + "surprised", +] + + +def get_default_base_motion_expression(): + return valid_base_motion_expressions[0] + + +# --- + + +def get_sorted_filenames_in_dir(dir_path: str, ext: str = ".jpg", throw_if_empty: bool = True) -> list: + """Return the sorted filenames in the spedified directory.""" + p = Path(dir_path) + if not p.exists() and not p.is_dir(): + raise RuntimeError(f"The path: {dir_path} does not exist") + + if not os.listdir(dir_path): + message = f"The path: {dir_path} is empty" + if throw_if_empty: + raise RuntimeError(message) + else: + return [] + + search_string = str(dir_path) + "/*" + ext + return sorted(glob.glob(search_string)) + + +# --- + + +description = """Experience a demo of the world's most advanced Text/Audio To Video (TTV) system, crafted by Two AI. + Sign up with Two AI to gain rapid, long-form generation, API keys, and more!""" + +# Core constants +tmp_dir = "/tmp/gradio" +data_dir = "./data" +male_key = "male" +female_key = "female" +unknown_key = "unknown" +media_height = 512 + +# Male/Female +female_terms = ["Female", "Lady", "Woman"] +male_terms = ["Male", "Lad", "Man"] + +# Elevenlabs Voices # +all_voices = ElevenLabsHelper.get_voices() +voices_ = [voice for voice in all_voices.voices if len(voice.name.split(" ")) < 2 and len(voice.name) < 10] +female_voice_names = ElevenLabsHelper.select_voices(voices_, labels={"gender": female_key, "age": "young"}) +male_voice_names = ElevenLabsHelper.select_voices(voices_, labels={"gender": male_key, "age": "young"}) +male_voice_names.remove("Priya") +voices = { + female_key: female_voice_names, + male_key: male_voice_names, + unknown_key: female_voice_names + male_voice_names, +} + +# Examples +# Base Images +example_base_image_dir = os.path.join(data_dir, "input_image_bases") +example_base_images = { + female_key: get_sorted_filenames_in_dir(os.path.join(example_base_image_dir, female_key), ext=".jpg"), + male_key: get_sorted_filenames_in_dir(os.path.join(example_base_image_dir, male_key), ext=".jpg"), +} + +# Base Videos +example_base_video_dir = os.path.join(data_dir, "input_video_bases") +example_source_videos = { + female_key: get_sorted_filenames_in_dir(os.path.join(example_base_video_dir, female_key), ext=".mp4"), + male_key: get_sorted_filenames_in_dir(os.path.join(example_base_video_dir, male_key), ext=".mp4"), +} + +# Driving Audio +example_driving_audio_dir = os.path.join(data_dir, "input_audio/gradio") +example_driving_audios_male = get_sorted_filenames_in_dir(os.path.join(example_driving_audio_dir, male_key), ext=".mp3") +example_driving_audios_female = get_sorted_filenames_in_dir( + os.path.join(example_driving_audio_dir, female_key), ext=".mp3" +) +example_driving_audios = {female_key: example_driving_audios_female, male_key: example_driving_audios_male} + +# Driving Text +audio_text_groups = ["General", "Promotional Messages", "Pronunciation Practice"] +example_driving_audio_texts = { + "General": [ + "The 2026 World Cup final match is in New York.", + "Enhance efficiency and cut costs with AI.", + "A bee's wings beat more than 200 times per second.", + "2026년 월드컵 결승전은 뉴욕에서 열립니다.", + "AI로 효율성을 높이고 비용을 절감하세요.", + "벌은 초당 200회 이상의 날개짓을 합니다.", + "2026 विश्व कप फाइनल मैच न्यूयॉर्क में होगा।", + "AI के साथ दक्षता बढ़ाएं और लागत कम करें।", + "मधुमक्खी के पंख सेकंड में 200 बार से अधिक फड़फड़ाते हैं।", + ], + "Promotional Messages": [ + "Welcome to our kiosk, where you can easily purchase tickets, or access various services by simply tapping the display!", + "Catch all the drama, emotion, and energy in my new film, now available on Netflix—it's a must-watch!", + "This season of IPL is full of surprises, and I’d love to see you supporting us as we fight for victory on the ground.", + "Transform your health with our latest fitness programs! Join us today and take the first step toward a stronger, energized you.", + ], + "Pronunciation Practice": [ + "A big black bug bit a big black dog on his big black nose.", + "Fuzzy Wuzzy was a bear. Fuzzy Wuzzy had no hair. Fuzzy Wuzzy wasn't very fuzzy, was he?", + ], +} + +example_showcase_dir = os.path.join(data_dir, "showcase_examples") +examples_showcase = { + "make_image_talk_multilingual": get_sorted_filenames_in_dir( + os.path.join(example_showcase_dir, "make_image_talk_multilingual"), ext=".mp4" + ), + "make_image_talk_cartoon": get_sorted_filenames_in_dir( + os.path.join(example_showcase_dir, "make_image_talk_cartoon"), ext=".mp4" + ), + "make_image_talk_diff_angles": get_sorted_filenames_in_dir( + os.path.join(example_showcase_dir, "make_image_talk_diff_angles"), ext=".mp4" + ), + "make_image_talk_hb": get_sorted_filenames_in_dir( + os.path.join(example_showcase_dir, "make_image_talk_hb"), ext=".mp4" + ), + "make_video_talk_multilingual": get_sorted_filenames_in_dir( + os.path.join(example_showcase_dir, "make_video_talk_multilingual"), ext=".mp4" + ), + "make_video_talk_corp_msg": get_sorted_filenames_in_dir( + os.path.join(example_showcase_dir, "make_video_talk_corp_msg"), ext=".mp4" + ), + "make_video_talk_rap_multii": get_sorted_filenames_in_dir( + os.path.join(example_showcase_dir, "make_video_talk_rap_multii"), ext=".mp4" + ), + "dubbing_superpowerman": get_sorted_filenames_in_dir(os.path.join(example_showcase_dir, "dubbing_superpowerman"), ext=".mp4"), + "make_image_talk_selfie": get_sorted_filenames_in_dir(os.path.join(example_showcase_dir, "make_image_talk_selfie"), ext=".mp4"), + "dubbing_coffee": get_sorted_filenames_in_dir(os.path.join(example_showcase_dir, "dubbing_coffee"), ext=".mp4"), +} + + +def update_voices(media_path): + def get_category(media_path): + if media_path: + for fterm in female_terms: + if fterm in media_path or fterm.lower() in media_path: + return female_key + + for mterm in male_terms: + if mterm in media_path or mterm.lower() in media_path: + return male_key + + return unknown_key + + category = get_category(media_path) + driving_input_voice = gr.Dropdown( + choices=voices[category], + value=voices[category][0], + interactive=True, + ) + return driving_input_voice + + +def task_executor_fn( + input_base_path, base_motion_expression, input_driving_audio_path, driving_text_input, driving_voice_input +): + + return task_executor.execute_task( + input_base_path, base_motion_expression, input_driving_audio_path, driving_text_input, driving_voice_input + ) + +with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo_image: + with gr.Row(): + # Step 1: Choose Image + with gr.Column(scale=4): + gr.Markdown("### Step 1: Choose Image") + gr.Markdown("Upload or select an example image to drive.") + with gr.Accordion(open=True, label="Base Image"): + base_image_input = gr.Image(type="filepath", sources="upload", height=media_height) + gr.Examples( + examples=[[example] for example in example_base_images[female_key]], + inputs=[base_image_input], + cache_examples=False, + label="Female", + ) + gr.Examples( + examples=[[example] for example in example_base_images[male_key]], + inputs=[base_image_input], + cache_examples=False, + label="Male", + ) + + # Step 2: Motion and Audio/TTS + with gr.Column(scale=4): + gr.Markdown("### Step 2: Motion and Audio/TTS") + gr.Markdown("Select motion and provide audio or text for lip-sync.") + with gr.Accordion(open=True, label="Base Motion"): + base_motion_expression = gr.Radio( + choices=valid_base_motion_expressions, + label="Select base motion", + value=get_default_base_motion_expression(), + ) + with gr.Tabs(): + with gr.TabItem("Driving Audio: File") as tab_audio_file: + with gr.Accordion(open=True, label="Driving Audio: From File"): + driving_audio_input = gr.Audio(sources=["upload"], type="filepath") + gr.Examples( + examples=[[example] for example in example_driving_audios[female_key]], + inputs=[driving_audio_input], + cache_examples=False, + examples_per_page=18, + label="Female", + ) + gr.Examples( + examples=[[example] for example in example_driving_audios[male_key]], + inputs=[driving_audio_input], + cache_examples=False, + examples_per_page=18, + label="Male", + ) + + with gr.TabItem("Driving Audio: TTS") as tab_audio_tts: + with gr.Accordion(open=True, label="Driving Audio: From Text"): + driving_input_voice = gr.Dropdown( + choices=voices[unknown_key], value=voices[unknown_key][0], label="Voice" + ) + driving_text_input = gr.Textbox( + label="Input Text (300 characters max)", + lines=2, + ) + for group in audio_text_groups: + gr.Examples( + examples=[[example] for example in example_driving_audio_texts[group]], + inputs=[driving_text_input], + cache_examples=False, + label=group, + ) + + # Step 3: Result + with gr.Column(scale=4): + gr.Markdown("### Step 3: Result") + gr.Markdown("Generate and view the output video.") + process_button_animation = gr.Button("🌟 Generate", variant="primary") + output_video_i2v = gr.Video(autoplay=True, label="The Output Video", height=media_height) + message = gr.Textbox(label="Info") + process_button_reset = gr.ClearButton( + [ + base_image_input, + driving_audio_input, + driving_text_input, + driving_input_voice, + output_video_i2v, + ], + value="🧹 Clear", + ) + + base_image_input.change(fn=update_voices, inputs=[base_image_input], outputs=[driving_input_voice]) + + # binding functions for buttons + process_button_animation.click( + fn=task_executor_fn, + inputs=[ + base_image_input, + base_motion_expression, + driving_audio_input, + driving_text_input, + driving_input_voice, + ], + outputs=[output_video_i2v, output_video_i2v, message], + show_progress=True, + ) + +with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo_video: + with gr.Row(): + # Step 1: Choose Video + with gr.Column(scale=4): + gr.Markdown("### Step 1: Choose Video") + gr.Markdown("Upload or select an example video to drive.") + with gr.Accordion(open=True, label="Base Video"): + base_video_input = gr.Video(sources="upload", height=media_height, interactive=True) + gr.Examples( + examples=[[example] for example in example_source_videos[female_key]], + inputs=[base_video_input], + cache_examples=False, + label="Female", + ) + gr.Examples( + examples=[[example] for example in example_source_videos[male_key]], + inputs=[base_video_input], + cache_examples=False, + label="Male", + ) + + # Step 2: Audio/TTS + with gr.Column(scale=4): + gr.Markdown("### Step 2: Audio/TTS") + gr.Markdown("Provide audio or text for lip-sync.") + with gr.Tabs(): + with gr.TabItem("Driving Audio: File") as tab_audio_file: + with gr.Accordion(open=True, label="Driving Audio: From File"): + driving_audio_input = gr.Audio(sources=["upload"], type="filepath") + gr.Examples( + examples=[[example] for example in example_driving_audios[female_key]], + inputs=[driving_audio_input], + cache_examples=False, + examples_per_page=18, + label="Female", + ) + gr.Examples( + examples=[[example] for example in example_driving_audios[male_key]], + inputs=[driving_audio_input], + cache_examples=False, + examples_per_page=18, + label="Male", + ) + with gr.TabItem("Driving Audio: TTS") as tab_audio_tts: + with gr.Accordion(open=True, label="Driving Audio: From Text"): + driving_input_voice = gr.Dropdown( + choices=voices[unknown_key], value=voices[unknown_key][0], label="Voice" + ) + driving_text_input = gr.Textbox( + label="Input Text (300 characters max)", + lines=2, + ) + for group in audio_text_groups: + gr.Examples( + examples=[[example] for example in example_driving_audio_texts[group]], + inputs=[driving_text_input], + cache_examples=False, + label=group, + ) + # Step 3: Result + with gr.Column(scale=4): + gr.Markdown("### Step 3: Result") + gr.Markdown("Generate and view the output video.") + process_button_animation = gr.Button("🌟 Generate", variant="primary") + output_video_i2v = gr.Video(autoplay=True, label="The Output Video", height=media_height) + message = gr.Textbox(label="Info") + process_button_reset = gr.ClearButton( + [base_video_input, driving_audio_input, driving_text_input, driving_input_voice, output_video_i2v], + value="🧹 Clear", + ) + + base_video_input.change(fn=update_voices, inputs=[base_video_input], outputs=[driving_input_voice]) + + # binding functions for buttons + base_motion_expression = gr.Radio(value=None, visible=False) + process_button_animation.click( + fn=task_executor_fn, + inputs=[ + base_video_input, + base_motion_expression, + driving_audio_input, + driving_text_input, + driving_input_voice, + ], + outputs=[output_video_i2v, output_video_i2v, message], + show_progress=True, + ) + +with gr.Blocks() as showcase_examples: + gr.Markdown("# Make Image Talk") + with gr.Row(): + with gr.Column(scale=7): + for path in examples_showcase["make_image_talk_multilingual"]: + gr.Video(value=path, label=os.path.basename(path), height=300) + with gr.Column(scale=3): + for path in examples_showcase["make_image_talk_cartoon"]: + gr.Video(value=path, label=os.path.basename(path), height=616) + with gr.Row(): + with gr.Column(scale=7): + for path in examples_showcase["make_image_talk_diff_angles"]: + gr.Video(value=path, label=os.path.basename(path), height=350) + with gr.Column(scale=3): + for path in examples_showcase["make_image_talk_hb"]: + gr.Video(value=path, label=os.path.basename(path), height=350) + with gr.Row(): + for path in examples_showcase['make_image_talk_selfie']: + gr.Video(value=path, label=os.path.basename(path), height=430) + + gr.Markdown("# Make Video Talk") + with gr.Row(): + with gr.Column(scale=7): + for path in examples_showcase["make_video_talk_multilingual"]: + gr.Video(value=path, label=os.path.basename(path), height=300) + with gr.Column(scale=3): + for path in examples_showcase["make_video_talk_corp_msg"]: + gr.Video(value=path, label=os.path.basename(path), height=616) + with gr.Row(): + for path in examples_showcase["make_video_talk_rap_multii"]: + gr.Video(value=path, label=os.path.basename(path), height=500) + + gr.Markdown("# Dubbing") + with gr.Row(): + for path in examples_showcase["dubbing_superpowerman"]: + gr.Video(value=path, label=os.path.basename(path), height=320) + with gr.Row(): + for path in examples_showcase["dubbing_coffee"]: + gr.Video(value=path, label=os.path.basename(path), height=440) + +with gr.Blocks(analytics_enabled=False, css="footer{display:none !important}", title="SUTRA Avatar v2") as demo: + gr.Markdown( + """ + ## + """ + ) + title = "# 🌟 SUTRA Avatar v2 🌟\n## Drive Image or Video with LipSync from Audio or Text" + gr.Markdown(title) + gr.Markdown(description) + + gr.TabbedInterface( + interface_list=[demo_image, demo_video, showcase_examples], + tab_names=["Drive Image", "Drive Video", "Showcase Examples"], + ) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="SUTRA AVATAR CLIENT") + args = parser.parse_args() + task_executor = CloudTaskExecutor() + + demo.queue(default_concurrency_limit=10).launch( + server_name="0.0.0.0", + allowed_paths=["/"], + ) diff --git a/base_task_executor.py b/base_task_executor.py new file mode 100644 index 0000000000000000000000000000000000000000..dfd8c63e3dc2e73323b6448db25e724f5793118d --- /dev/null +++ b/base_task_executor.py @@ -0,0 +1,179 @@ +import os +import random +import re +import shutil +import time +from abc import ABC, abstractmethod +from datetime import datetime +from pathlib import Path + +import gradio as gr + +from elevenlabs_helper import ElevenLabsHelper + +# --- +talk_key = "talk" + +# --- + +valid_image_exts = (".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp") + + +def is_image(file_path): + return file_path.lower().endswith(valid_image_exts) + + +def get_formatted_datetime_name() -> str: + d = datetime.now() + return d.strftime("d%y%m%d" + "-" + "t%H%M%S") + + +def get_name_ext(filepath): + filepath = os.path.abspath(filepath) + _, name_ext = os.path.split(filepath) + name, ext = os.path.splitext(name_ext) + return name, ext + + +def sanitize_string(string): + sanitized_string = re.sub(r"[^A-Za-z0-9]", "", string) + max_len = 15 + return sanitized_string[:max_len] + + +def get_output_video_name( + input_base_path, input_driving_path, base_motion_expression, input_driving_audio_path, tag="" +): + if not tag: + tag = get_formatted_datetime_name() + + base_name, _ = get_name_ext(input_base_path) + base_name = sanitize_string(base_name) + + driving_name = "" + if input_driving_path: + driving_name, _ = get_name_ext(input_driving_path) + driving_name = sanitize_string(driving_name) + elif base_motion_expression and is_image(input_base_path): + driving_name = base_motion_expression + + audio_name = "" + if input_driving_audio_path: + audio_name, _ = get_name_ext(input_driving_audio_path) + audio_name = sanitize_string(audio_name) + + output_video_name = f"{tag}--b-{base_name}" + + if driving_name: + output_video_name += f"--d-{driving_name}" + + if audio_name: + output_video_name += f"--a-{audio_name}" + return output_video_name + + +def generate_random_integer(num_digits): + current_time = int(time.time() * 1000) + random.seed(current_time) + lower_bound = 0 + upper_bound = (10**num_digits) - 1 + return random.randint(lower_bound, upper_bound) + + +def get_unique_name(maxd=4, delim="-"): + pid = os.getpid() + pid_str = str(pid)[-maxd:] + + time_ns = time.time_ns() + time_str = str(time_ns)[-maxd:] + + rint = generate_random_integer(maxd) + rint_str = str(rint).zfill(maxd) + return delim.join([pid_str, time_str, rint_str]) + + +def mkdir_p(path: str) -> None: + if not Path(path).exists(): + Path(path).mkdir(parents=True) + + +# --- + + +class BaseTaskExecutor(ABC): + def __init__(self): + self.tmp_dir = "/tmp/gradio" + + def execute_task( + self, input_base_path, base_motion_expression, input_driving_audio_path, driving_text_input, driving_voice_input + ): + tag = get_unique_name() + output_dir = os.path.join(self.tmp_dir, tag) + mkdir_p(output_dir) + + do_dafile = input_driving_audio_path is not None and os.path.exists(input_driving_audio_path) + do_datts = driving_text_input and driving_voice_input + do_talk = do_dafile or do_datts + + if base_motion_expression: + if talk_key not in base_motion_expression and do_talk: + gr.Warning( + f"Ignoring Driving Audio since expressive Base Motion selected: {base_motion_expression}") + do_dafile = False + do_datts = False + do_talk = False + + if talk_key in base_motion_expression and not do_talk: + gr.Warning(f"Selected talking Base Motion but no Driving Audio") + else: + base_motion_expression = "" + + if do_datts: + if do_dafile: + gr.Warning("Ignoring Audio File input since TTS is selected.\nClear the undesired input if this is not intended.") + output_audio_file = os.path.join(f"{output_dir}/{tag}.mp3") + ElevenLabsHelper.generate_voice(driving_text_input, driving_voice_input, output_audio_file) + input_driving_audio_path = output_audio_file + + if not do_talk: + input_driving_audio_path = "" + + if input_base_path is not None and os.path.exists(input_base_path): + input_driving_path = "" + request_id = get_unique_name(maxd=8, delim="") + output_video_path = os.path.join( + self.tmp_dir, + get_output_video_name( + input_base_path, input_driving_path, base_motion_expression, input_driving_audio_path + ) + + ".mp4", + ) + result, output_video_path = self.generate( + input_base_path, + input_driving_path, + base_motion_expression, + input_driving_audio_path, + output_video_path, + request_id, + ) + success = result["success"] + messages = result["messages"] + + self.clean(output_dir) + + if success: + return output_video_path, gr.update(visible=True), messages + else: + gr.Info("Task could not be completed", duration=4) + return None, gr.update(visible=False), f"ERROR\n\n{messages}" + else: + self.clean(output_dir) + raise gr.Error("No source selected!", duration=6) + + @abstractmethod + def generate(self): + pass + + def clean(self, output_dir): + if os.path.isdir(output_dir): + shutil.rmtree(output_dir) diff --git a/cloud_task_executor.py b/cloud_task_executor.py new file mode 100644 index 0000000000000000000000000000000000000000..3a3fecfce70581660dd1d28b7266f79c127edd96 --- /dev/null +++ b/cloud_task_executor.py @@ -0,0 +1,143 @@ +import base64 +import json +import ntpath +import os +import time + +import gradio as gr +import requests +from google.cloud import storage + +from base_task_executor import BaseTaskExecutor + +# --- +enc = "utf-8" + + +def decode(string): + return json.loads(base64.b64decode(string.encode(enc)).decode(enc)) + + +def get_storage_client_from_env(): + credentials_json = decode(os.environ["GCP_API_KEY"]) + return storage.Client.from_service_account_info(credentials_json) + + +def get_name_ext(filepath): + filepath = os.path.abspath(filepath) + _, name_ext = os.path.split(filepath) + name, ext = os.path.splitext(name_ext) + return name, ext + + +def make_remote_media_path(request_id, media_path): + assert len(request_id) > 6 + assert os.path.exists(media_path) + src_id = request_id[:3] + slot_id = request_id[3:6] + request_suffix = request_id[6:] + name, ext = get_name_ext(media_path) + return os.path.join(src_id, slot_id, request_suffix, name + ext) + + +def copy_file_to_gcloud(bucket, local_file_path, remote_file_path): + blob = bucket.blob(remote_file_path) + blob.upload_from_filename(local_file_path) + +def copy_to_gcloud(storage_client, local_media_path, bucket_name, remote_media_path): + bucket = storage_client.get_bucket(bucket_name) + copy_file_to_gcloud(bucket, local_media_path, remote_media_path) + + +# --- + + +class CloudTaskExecutor(BaseTaskExecutor): + def __init__(self): + super().__init__() + self.base_url = os.getenv("SUTRA_AVATAR_BASE_URL") + self.headers = {"Authorization": f'{os.getenv("SUTRA_AVATAR_API_KEY")}', "Content-Type": "application/json"} + self.bucket_name = os.getenv("SUTRA_AVATAR_BUCKET_NAME") + self.storage_client = get_storage_client_from_env() + + def submit_task(self, submit_request): + url = f"{self.base_url}/task/submit" + response = requests.post(url, json=submit_request, headers=self.headers) + if response.status_code == 200: + return response.json() + else: + response.raise_for_status() + + def get_task_status(self, request_id): + url = f"{self.base_url}/task/status" + response = requests.get(url, params={"rid": request_id}, headers=self.headers) + if response.status_code == 200: + return response.json() + else: + response.raise_for_status() + + def generate( + self, + input_base_path, + input_driving_path, + base_motion_expression, + input_driving_audio_path, + output_video_path, + request_id, + ): + + # Upload files + media_paths = [input_base_path, input_driving_audio_path] + for media_path in media_paths: + if media_path: + remote_media_path = make_remote_media_path(request_id, media_path) + copy_to_gcloud(self.storage_client, media_path, self.bucket_name, remote_media_path) + + submit_request = { + "requestId": request_id, + "input_base_path": ntpath.basename(input_base_path), + "input_driving_path": "", + "base_motion_expression": base_motion_expression, + "input_driving_audio_path": ntpath.basename(input_driving_audio_path), + "output_video_path": ntpath.basename(output_video_path), + } + submit_reply = self.submit_task(submit_request) + estimatedWaitSeconds = "unknown" + if "estimatedWaitSeconds" in submit_reply.keys(): + estimatedWaitSeconds = submit_reply["estimatedWaitSeconds"] + + completion_statuses = {"Succeeded", "Cancelled", "Failed", "NotFound"} + timeout = 240 # maximum time to wait in seconds + if isinstance(estimatedWaitSeconds, int): + timeout += estimatedWaitSeconds + start_time = time.time() + + result = {"messages": ''} + while True: + status_reply = self.get_task_status(request_id) + task_status = status_reply["taskStatus"] + + if status_reply["taskStatus"] in completion_statuses: + break + + if time.time() - start_time > timeout: + msg = "The task did not complete within the timeout period.\n The server is very busy serving other requests.\n Please try again." + result["success"] = False + result["messages"] = msg + gr.Error(msg) + break + time.sleep(3) + + task_status = status_reply["taskStatus"] + if task_status == "Succeeded": + pipe_reply = status_reply["pipeReply"] + result["success"] = pipe_reply["status"] == "success" + result["messages"] = pipe_reply["messages"] + output_video_path = status_reply["videoURL"] + else: + messages = "" + if "pipeReply" in status_reply.keys(): + messages = status_reply["pipeReply"]["messages"] + result["success"] = False + result["messages"] += messages + return result, output_video_path diff --git a/data/input_audio/gradio/female/en-BeesWingsBeat-Shelby.mp3 b/data/input_audio/gradio/female/en-BeesWingsBeat-Shelby.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..bc6ec416dd646398abb088b92c805bfcad0e4fd1 --- /dev/null +++ b/data/input_audio/gradio/female/en-BeesWingsBeat-Shelby.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2a85a13e25fb823143e26a39ce6de823861199b90784db4461a243d01f87201 +size 55588 diff --git a/data/input_audio/gradio/female/en-EnhanceEfficiency-Shelby.mp3 b/data/input_audio/gradio/female/en-EnhanceEfficiency-Shelby.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..4ea4a2da33e57724a89563a4590804d3d80343ac --- /dev/null +++ b/data/input_audio/gradio/female/en-EnhanceEfficiency-Shelby.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35135724f58a72574c9f92e5bcdec1c41eac7f02480fc306b648263f0750a742 +size 60604 diff --git a/data/input_audio/gradio/female/en-The2026WorldCup-Shelby.mp3 b/data/input_audio/gradio/female/en-The2026WorldCup-Shelby.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..06b918cb3865f71e628e9ce1477e718ee13dc319 --- /dev/null +++ b/data/input_audio/gradio/female/en-The2026WorldCup-Shelby.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c4054806558c0f2b26313a5b352b042fdc7dba0c90eac36e9c0c667dd00bcf3 +size 71053 diff --git a/data/input_audio/gradio/female/hi-BeesWingsBeat-Matilda.mp3 b/data/input_audio/gradio/female/hi-BeesWingsBeat-Matilda.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..df110b3d188c252bceab2c273cec61eeaf8c49d9 --- /dev/null +++ b/data/input_audio/gradio/female/hi-BeesWingsBeat-Matilda.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:981852faccc81eccf82effc8ad3a2bef134c447c038ec15c4c7ff418c1a40c25 +size 57678 diff --git a/data/input_audio/gradio/female/hi-EnhanceEfficiency-Matilda.mp3 b/data/input_audio/gradio/female/hi-EnhanceEfficiency-Matilda.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..69e401d75838790f1e910a5842b1a63ac7073fc6 --- /dev/null +++ b/data/input_audio/gradio/female/hi-EnhanceEfficiency-Matilda.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:568d0dd0fad0648e711fa50e0c048cad18df52e03a87503ff382379686acf89b +size 48065 diff --git a/data/input_audio/gradio/female/hi-The2026WorldCup-Matilda.mp3 b/data/input_audio/gradio/female/hi-The2026WorldCup-Matilda.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..c74aad4107ddb360538aa8fd53d14a4ab8fb0afb --- /dev/null +++ b/data/input_audio/gradio/female/hi-The2026WorldCup-Matilda.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a304d592f4d2b10a91f7b82b25416813ca891b50e64fb513aa7f3cf1b8f0cd7c +size 53498 diff --git a/data/input_audio/gradio/female/ko-BeesWingsBeat-Jinju.mp3 b/data/input_audio/gradio/female/ko-BeesWingsBeat-Jinju.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..6733af1a03d0802464cce5a7cd8bb137d66eb551 --- /dev/null +++ b/data/input_audio/gradio/female/ko-BeesWingsBeat-Jinju.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e97b106f1757b8f64cecb33ae9265eaba0dfa5a28bb6f27d1f42534937f203 +size 47229 diff --git a/data/input_audio/gradio/female/ko-EnhanceEfficiency-Jinju.mp3 b/data/input_audio/gradio/female/ko-EnhanceEfficiency-Jinju.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..bd2b468d40c280419808d88438b35e497ec2065d --- /dev/null +++ b/data/input_audio/gradio/female/ko-EnhanceEfficiency-Jinju.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34144828e9499c22fa5d7be6a621aadea5f0a25d68dca04a6ad3b65f01dfa36d +size 48065 diff --git a/data/input_audio/gradio/female/ko-The2026WorldCup-Jinju.mp3 b/data/input_audio/gradio/female/ko-The2026WorldCup-Jinju.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..f09cdd0d6d7053e2592a2a9792abf1b16fb4464e --- /dev/null +++ b/data/input_audio/gradio/female/ko-The2026WorldCup-Jinju.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60f6dc9a567be17f2edc9d4fa5e877a4025e7acabdc4260014612b420f7b2981 +size 57678 diff --git a/data/input_audio/gradio/male/en-BeesWingsBeat-Marcus.mp3 b/data/input_audio/gradio/male/en-BeesWingsBeat-Marcus.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..c1cd1dee397d3e34cb5b8ebdddf95080a14ca6e3 --- /dev/null +++ b/data/input_audio/gradio/male/en-BeesWingsBeat-Marcus.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b677ad256f0d28d1c9c9afabb347d7b1520aadd1b0e19ca09665fe3b9a7adfed +size 46811 diff --git a/data/input_audio/gradio/male/en-EnhanceEfficiency-Marcus.mp3 b/data/input_audio/gradio/male/en-EnhanceEfficiency-Marcus.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..acc47e49025ce8a148308a08a0bdfd668c2a2c04 --- /dev/null +++ b/data/input_audio/gradio/male/en-EnhanceEfficiency-Marcus.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770cce3bbfca0913ceb8651584d6515c8f271bffb45d11e0f76ecf96af19e00a +size 40542 diff --git a/data/input_audio/gradio/male/en-The2026WorldCup-Marcus.mp3 b/data/input_audio/gradio/male/en-The2026WorldCup-Marcus.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..0a6e6039c11cb7efcac8da9ec57bb6eb23a48a41 --- /dev/null +++ b/data/input_audio/gradio/male/en-The2026WorldCup-Marcus.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86f34c9f42944b8a76cc727c06f28556630d94a82304b37890919cb64d8cab51 +size 57260 diff --git a/data/input_audio/gradio/male/hi-BeesWingsBeat-Liam.mp3 b/data/input_audio/gradio/male/hi-BeesWingsBeat-Liam.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..c24f9603d5ca1d227b8cabfb833fdde1beebf188 --- /dev/null +++ b/data/input_audio/gradio/male/hi-BeesWingsBeat-Liam.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f767e72ca739f8e3ba3edea24f5f9b533bfdbef37c60db02125dd1c18d54a1ef +size 64365 diff --git a/data/input_audio/gradio/male/hi-EnhanceEfficiency-Liam.mp3 b/data/input_audio/gradio/male/hi-EnhanceEfficiency-Liam.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..09caf097a689ebc267aa5a67059e2e0e950f5f48 --- /dev/null +++ b/data/input_audio/gradio/male/hi-EnhanceEfficiency-Liam.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcdb3e0776e8aa60778d97dc9a73beaa81b6b94a2b31cf4e34437fdc12233425 +size 50991 diff --git a/data/input_audio/gradio/male/hi-The2026WorldCup-Liam.mp3 b/data/input_audio/gradio/male/hi-The2026WorldCup-Liam.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..afbd1525b9e01b0e3fb752ee7940bfff27448e56 --- /dev/null +++ b/data/input_audio/gradio/male/hi-The2026WorldCup-Liam.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f292ae2e165b6fb713807888ab604848bf02f162f1621d47cd06bfc1926dd7f +size 54752 diff --git a/data/input_audio/gradio/male/ko-BeesWingsBeat-Noah.mp3 b/data/input_audio/gradio/male/ko-BeesWingsBeat-Noah.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..27e17df01bd3a5bd7a0f2d00da8592e07eae943f --- /dev/null +++ b/data/input_audio/gradio/male/ko-BeesWingsBeat-Noah.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bdcbdf30de7b6fbadd04099c08e47812311aeb1fcc5bb2c87ac4d92ab5d9a90 +size 47229 diff --git a/data/input_audio/gradio/male/ko-EnhanceEfficiency-Noah.mp3 b/data/input_audio/gradio/male/ko-EnhanceEfficiency-Noah.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..2f64bee773dcebdc510152b6fcff74140be3e0fa --- /dev/null +++ b/data/input_audio/gradio/male/ko-EnhanceEfficiency-Noah.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be33748c1b19c74abb6f2daaa343d4c5c2c5c8c00a7a03d2fbc20ca8e08ef9a6 +size 44303 diff --git a/data/input_audio/gradio/male/ko-The2026WorldCup-Noah.mp3 b/data/input_audio/gradio/male/ko-The2026WorldCup-Noah.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..2bdb563029d78b5570ce27423893c3164008aa47 --- /dev/null +++ b/data/input_audio/gradio/male/ko-The2026WorldCup-Noah.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a4ea9b5d46d6e419b59b875b0b84203170736a394e4eb676e7da70af8261d64 +size 58514 diff --git a/data/input_image_bases/female/01-Female-American_608.jpg b/data/input_image_bases/female/01-Female-American_608.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7d759449a3d59832557b1fdc3bd948cc17371097 --- /dev/null +++ b/data/input_image_bases/female/01-Female-American_608.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8f78b7e0c7e81ed5aa6d94d6f7a4197921363ec1e35e69c1d38b9095be8469c +size 227792 diff --git a/data/input_image_bases/female/02-Female-Indian01_608.jpg b/data/input_image_bases/female/02-Female-Indian01_608.jpg new file mode 100644 index 0000000000000000000000000000000000000000..14396c7e216376f925c5547b77edcc24ec0abfa1 --- /dev/null +++ b/data/input_image_bases/female/02-Female-Indian01_608.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8743f2c7b9c5f09bacd5aca74ae983a2c83a9501e1af0c5b4765c33f80286b51 +size 211263 diff --git a/data/input_image_bases/female/03-Female-Korean_608.jpg b/data/input_image_bases/female/03-Female-Korean_608.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9182351543f5e7363cfb9f8f34d59da42d81bfdd --- /dev/null +++ b/data/input_image_bases/female/03-Female-Korean_608.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4b77e5db173fd080841145a82b2937fd5365f0ff1563762aed43789cbb865da +size 214450 diff --git a/data/input_image_bases/female/04-Female-Indian02_608.jpg b/data/input_image_bases/female/04-Female-Indian02_608.jpg new file mode 100644 index 0000000000000000000000000000000000000000..924b3c00fb293398d53999ab42fa7d16ba69664f --- /dev/null +++ b/data/input_image_bases/female/04-Female-Indian02_608.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0db7c64c33a13797a5c72b427df2803c45560a9a7cc606897a91f1b4a81aee69 +size 244752 diff --git a/data/input_image_bases/female/05-Female-European_608.jpg b/data/input_image_bases/female/05-Female-European_608.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b1915dad706b2571f7690ca4f623d407b724d6d5 --- /dev/null +++ b/data/input_image_bases/female/05-Female-European_608.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:736a1a4f72f8c221bdae6a9cc438e1bb6058892353c8611d2cc4731eba2bfa0d +size 250090 diff --git a/data/input_image_bases/male/01-Male-Indian_608.jpg b/data/input_image_bases/male/01-Male-Indian_608.jpg new file mode 100644 index 0000000000000000000000000000000000000000..02b85ce6d5e7fd65c82e76360e79466099480975 --- /dev/null +++ b/data/input_image_bases/male/01-Male-Indian_608.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:816d36bbb50acbacebf74d0e0c9f1a9fe5b39c37d6f40c612a7b67dc02ffe772 +size 213975 diff --git a/data/input_image_bases/male/02-Male-Korean_608.jpg b/data/input_image_bases/male/02-Male-Korean_608.jpg new file mode 100644 index 0000000000000000000000000000000000000000..26ebb0f727469f6ef56e74c072c0a211c4402374 --- /dev/null +++ b/data/input_image_bases/male/02-Male-Korean_608.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0178b68fb104f30efb198ee96ac4ba41dbedf516fed306c73fd9548d68adb4fd +size 224990 diff --git a/data/input_image_bases/male/03-Male-European_608.jpg b/data/input_image_bases/male/03-Male-European_608.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b73b7b51df002a7920b84783b589f38c938d4823 --- /dev/null +++ b/data/input_image_bases/male/03-Male-European_608.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae049f326bdaa5d966285d2cbd8429dcb7c48ab91a55126b38781be65673b98 +size 272173 diff --git a/data/input_image_bases/male/04-Male-American_608.jpg b/data/input_image_bases/male/04-Male-American_608.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0edb0e5e0bca18e2aa25194324693abb05b320bf --- /dev/null +++ b/data/input_image_bases/male/04-Male-American_608.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a2d59c6418c2bb8490265d0fd261f2c1ec0e50e09fbf61abaef2e57ef870b8d +size 242395 diff --git a/data/input_image_bases/male/05-Male-AfricanAmerican_608.jpg b/data/input_image_bases/male/05-Male-AfricanAmerican_608.jpg new file mode 100644 index 0000000000000000000000000000000000000000..11a041cba76ee0847c7d3dd551f6d320248069cc --- /dev/null +++ b/data/input_image_bases/male/05-Male-AfricanAmerican_608.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b84bc585d8ea0d6303bac4b82ef587df6c1bf03c2e445474554c4c4abbc4bc4 +size 204953 diff --git a/data/input_video_bases/female/01-Female-Korean_608.mp4 b/data/input_video_bases/female/01-Female-Korean_608.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c8aa5c9bfd324697e6891363a0b28e6422d490b6 --- /dev/null +++ b/data/input_video_bases/female/01-Female-Korean_608.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ecf7828e7d0f421767d190b3555868728b184edac1f4a0201820f1c58865d7c +size 2000776 diff --git a/data/input_video_bases/female/02-Female-Latina_608.mp4 b/data/input_video_bases/female/02-Female-Latina_608.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..11e6f7b211af423927aecc75706b6232ad3e0563 --- /dev/null +++ b/data/input_video_bases/female/02-Female-Latina_608.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6884cd58b987f02443d83b3faae37951aa33a689245c3bf65725f609c6303789 +size 2666194 diff --git a/data/input_video_bases/female/03-Female-European_608.mp4 b/data/input_video_bases/female/03-Female-European_608.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..75ff8d2898d4b12ee7349ca8fa66b6f1ef2b94ea --- /dev/null +++ b/data/input_video_bases/female/03-Female-European_608.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbde154264db6fbcb94e3c93c529b365f67e667473cc8a1445e0e9223ce6ea8b +size 1625368 diff --git a/data/input_video_bases/female/04-Female-Indian_608.mp4 b/data/input_video_bases/female/04-Female-Indian_608.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..f62034e52cbcbf2a284c6472eac10b791497afc6 --- /dev/null +++ b/data/input_video_bases/female/04-Female-Indian_608.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a3a358644c023f7cde032e5570d9b39b615b594d8ab6747456a2c60ac9a1f1c +size 1529791 diff --git a/data/input_video_bases/female/05-Female-American_608.mp4 b/data/input_video_bases/female/05-Female-American_608.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e80156702aebbde41dd22c4865b26a0c72c0b2bb --- /dev/null +++ b/data/input_video_bases/female/05-Female-American_608.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a91366a511a6b27f15edca2b5b6428e1ea3781971c9ac4202a34c49c0cef89 +size 1903512 diff --git a/data/input_video_bases/male/01-Male-Japanese_608.mp4 b/data/input_video_bases/male/01-Male-Japanese_608.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..efafdfea18a3e1f57364eab55e3319e9f9c94db2 --- /dev/null +++ b/data/input_video_bases/male/01-Male-Japanese_608.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9325107bacf0442932b74f88fc861a008fbbf4770f32074a0f818cc7f69c1759 +size 1770959 diff --git a/data/input_video_bases/male/02-Male-European_608.mp4 b/data/input_video_bases/male/02-Male-European_608.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..91c86b525e286e1e77b7b2b72538a3684000b2b4 --- /dev/null +++ b/data/input_video_bases/male/02-Male-European_608.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0eb1e61a0b6f22a4fcfd3acb90c5e661396678fcde7eca3edd394f1223483ea +size 1693659 diff --git a/data/input_video_bases/male/03-Male-American02_608.mp4 b/data/input_video_bases/male/03-Male-American02_608.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6fecedd71f3ab6db2e9d4659f21b57baea2a23a8 --- /dev/null +++ b/data/input_video_bases/male/03-Male-American02_608.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68c9427293f6b721ac180f596b71ea4df1e5a5f5d3938f7ac9ac16df2007562f +size 1927639 diff --git a/data/input_video_bases/male/04-Male-Indian_608.mp4 b/data/input_video_bases/male/04-Male-Indian_608.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..411c8a9814330150bc3bff4cc2167fb5dd79782c --- /dev/null +++ b/data/input_video_bases/male/04-Male-Indian_608.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e12f8f6c70d602ad8c8f422ffd703a6c012b453d9902245b82b4ae0c051397d6 +size 1352685 diff --git a/data/input_video_bases/male/05-Male-American_608.mp4 b/data/input_video_bases/male/05-Male-American_608.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..ae54aba44772aee7d6bd512721c1c98cb28db15c --- /dev/null +++ b/data/input_video_bases/male/05-Male-American_608.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:448f993473f7a8291f8591856e15701c7e9bb373ddbf9e9c8a773d69b84601ac +size 1854230 diff --git a/data/showcase_examples/archive/01 Multilingual Female_720.mp4 b/data/showcase_examples/archive/01 Multilingual Female_720.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b3e847b71f5bfa9b51ebfa300211ac8b75fc7472 --- /dev/null +++ b/data/showcase_examples/archive/01 Multilingual Female_720.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c67441bab5596482bfcb40c725c0829fb7b4df1a5642e43661b6553b20cefed2 +size 17771532 diff --git a/data/showcase_examples/archive/02 Multilingual Male_720.mp4 b/data/showcase_examples/archive/02 Multilingual Male_720.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..082678b117265eb9b55a39294a989e0bf3f96bb2 --- /dev/null +++ b/data/showcase_examples/archive/02 Multilingual Male_720.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82605475898eddb08165ec3429bb933e94a765d23c8c7a4ef1ecfa70363a4638 +size 13215459 diff --git a/data/showcase_examples/archive/02 Multilingual Male_720_IM.mp4 b/data/showcase_examples/archive/02 Multilingual Male_720_IM.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..bcf284bac47e7457ff84ab8885abb04a6abf3801 --- /dev/null +++ b/data/showcase_examples/archive/02 Multilingual Male_720_IM.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d63481b053c30f05600791361914e9d2f7a17d003da56d1776f319622d8ec0a3 +size 17479793 diff --git a/data/showcase_examples/archive/03 Corporate Message_720.mp4 b/data/showcase_examples/archive/03 Corporate Message_720.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..a67df609f98fdc2bf334d3e189f4a731b2ee8de5 --- /dev/null +++ b/data/showcase_examples/archive/03 Corporate Message_720.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:917db888f55ada94ee47b2f05a0ed2274f71d750b25f3c11ae5e9bc4b86a663c +size 2930433 diff --git a/data/showcase_examples/archive/04 Multi-Identities: Multilingual_720.mp4 b/data/showcase_examples/archive/04 Multi-Identities: Multilingual_720.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..7fb7f08015e530fc026f94d5ad9085260580306c --- /dev/null +++ b/data/showcase_examples/archive/04 Multi-Identities: Multilingual_720.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:414fd98b0816cbd2834353b87dcb3e3f41e3c47423c0b50040a79461c225f500 +size 5313472 diff --git a/data/showcase_examples/archive/05 Multi-Identities: Rap_720.mp4 b/data/showcase_examples/archive/05 Multi-Identities: Rap_720.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..60e52d87aff0c14939558b634d00f1bcfc531a4f --- /dev/null +++ b/data/showcase_examples/archive/05 Multi-Identities: Rap_720.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efcc23f689bc5067a30ab46efaa6d546c46cf422427dbb058fde6b8be066fbd3 +size 2556681 diff --git a/data/showcase_examples/archive/dubbing_heyjen/01 Original.mp4 b/data/showcase_examples/archive/dubbing_heyjen/01 Original.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2ef57f8551dd4126f8792868056780c374402a5f --- /dev/null +++ b/data/showcase_examples/archive/dubbing_heyjen/01 Original.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e694fed9e8452f8d0422fb0ff6a57891623bb717c990b26024cda21cc60772c3 +size 3046252 diff --git a/data/showcase_examples/archive/dubbing_heyjen/02 Hindi.mp4 b/data/showcase_examples/archive/dubbing_heyjen/02 Hindi.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..84ff888e1513b33ec190865fd1eb243a96d1fb36 --- /dev/null +++ b/data/showcase_examples/archive/dubbing_heyjen/02 Hindi.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1109a20e54bd244ad8762c068c64123d9e6a4e81d9e7137aa76d60258107afdf +size 5408513 diff --git a/data/showcase_examples/archive/dubbing_heyjen/03 Korean.mp4 b/data/showcase_examples/archive/dubbing_heyjen/03 Korean.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2708bfd5f3a60ac535af54addae483e9861e8dd6 --- /dev/null +++ b/data/showcase_examples/archive/dubbing_heyjen/03 Korean.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26934012f35a53d8e406b86eaf0ebd0328ebc447c13e6c976e42ca41458661d0 +size 5310368 diff --git a/data/showcase_examples/archive/dubbing_heyjen/04 Italian.mp4 b/data/showcase_examples/archive/dubbing_heyjen/04 Italian.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4db8142f5b7d6df44c71613bf7cfe1b78dc27583 --- /dev/null +++ b/data/showcase_examples/archive/dubbing_heyjen/04 Italian.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b31ce8b3798ac20a727e0ca47358fd2918559501569cc9650d74f0b582a3a601 +size 5299090 diff --git a/data/showcase_examples/dubbing_coffee/01 Coffee : Original.mp4 b/data/showcase_examples/dubbing_coffee/01 Coffee : Original.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d2f9003e97c7bb8cf017434a0c40a78c5dc76176 --- /dev/null +++ b/data/showcase_examples/dubbing_coffee/01 Coffee : Original.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8cdf985774bebc18942a0a3f6800ead76130cb254e912b3c42d7114b037357c +size 7701148 diff --git a/data/showcase_examples/dubbing_coffee/02 Coffee : Hindi.mp4 b/data/showcase_examples/dubbing_coffee/02 Coffee : Hindi.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..110631b42731d98a68d66280441f70d351d85ecf --- /dev/null +++ b/data/showcase_examples/dubbing_coffee/02 Coffee : Hindi.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66b739dfed86260ae273cc57224841256b92ca940f7fed1c2a11721aec3aacd2 +size 8293425 diff --git a/data/showcase_examples/dubbing_coffee/03 Coffee : Italian.mp4 b/data/showcase_examples/dubbing_coffee/03 Coffee : Italian.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..13615496e9c49fbb74b31f9314c3d7b6778ae251 --- /dev/null +++ b/data/showcase_examples/dubbing_coffee/03 Coffee : Italian.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:debd28c28c2d389f9a8f692aca019a69d3231f1ffd7f6191e6a6465df1dd075b +size 8322711 diff --git a/data/showcase_examples/dubbing_coffee/04 Coffee : Korean.mp4 b/data/showcase_examples/dubbing_coffee/04 Coffee : Korean.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2d7e2c376b4455b8f7c0f57e9396a6418c3c0cb9 --- /dev/null +++ b/data/showcase_examples/dubbing_coffee/04 Coffee : Korean.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59f3f38626a554ce59475e57289e7ac0fe545a411a40483513c3e6f19e662f19 +size 8308263 diff --git a/data/showcase_examples/dubbing_superpowerman/01 Superpower Man : Original.mp4 b/data/showcase_examples/dubbing_superpowerman/01 Superpower Man : Original.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..f8d05890fd29f7ce37b0c40e0282bdbadbe75ba4 --- /dev/null +++ b/data/showcase_examples/dubbing_superpowerman/01 Superpower Man : Original.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c735965de6544fe7dd0e1c9ab6f5b3b57e0dd37fb5d2f6dc044d0b596668487 +size 10632376 diff --git a/data/showcase_examples/dubbing_superpowerman/02 Superpower Man : Spanish.mp4 b/data/showcase_examples/dubbing_superpowerman/02 Superpower Man : Spanish.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..511086fb88ca162f8e2733fb5f14601f07ac10d7 --- /dev/null +++ b/data/showcase_examples/dubbing_superpowerman/02 Superpower Man : Spanish.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f04c86dc2034a94422d8197d3a012e59abc18f92f88c27e6f42a7e0ac18ad08 +size 3776826 diff --git a/data/showcase_examples/dubbing_superpowerman/03 Superpower Man : Hindi.mp4 b/data/showcase_examples/dubbing_superpowerman/03 Superpower Man : Hindi.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..1aa88d812eef14e2c7551a0d685111856942579e --- /dev/null +++ b/data/showcase_examples/dubbing_superpowerman/03 Superpower Man : Hindi.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fed3c8c7a94e174e0f8403aae45c5f5daf80cf3cd1e5840476f3266ca1438fc +size 3790191 diff --git a/data/showcase_examples/make_image_talk_cartoon/03 Cartoon.mp4 b/data/showcase_examples/make_image_talk_cartoon/03 Cartoon.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..847001ea4607a91e7ddeff2eb1cb8087446716d7 --- /dev/null +++ b/data/showcase_examples/make_image_talk_cartoon/03 Cartoon.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff706a3f5afab5a0730c1083829188192be81aa7202766df054d43cb178128d6 +size 1058673 diff --git a/data/showcase_examples/make_image_talk_diff_angles/04 Multiple Facial Angles.mp4 b/data/showcase_examples/make_image_talk_diff_angles/04 Multiple Facial Angles.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..38ed021db76f1c86b5f0f222d541ada325c3b9f6 --- /dev/null +++ b/data/showcase_examples/make_image_talk_diff_angles/04 Multiple Facial Angles.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5803b63cc873ada7695ff69b971a800af39e9e75eac213b2f2d83ce03d11637 +size 1323372 diff --git a/data/showcase_examples/make_image_talk_hb/05 Happy Birthday Message to Friend.mp4 b/data/showcase_examples/make_image_talk_hb/05 Happy Birthday Message to Friend.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4ea91134d1b5f137a31e6574a7bc3cc0ae8095c3 --- /dev/null +++ b/data/showcase_examples/make_image_talk_hb/05 Happy Birthday Message to Friend.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa434586b779bea056d54b2e515b2aa6f5ae58ca6cea4fa1ed783aa5d8936d2a +size 1682531 diff --git a/data/showcase_examples/make_image_talk_multilingual/01 Multilingual Female.mp4 b/data/showcase_examples/make_image_talk_multilingual/01 Multilingual Female.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d2b32e9c189bff73729c41ed4a96d400a094bd40 --- /dev/null +++ b/data/showcase_examples/make_image_talk_multilingual/01 Multilingual Female.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0dc0143b5b77408590aadeaf3f4d46d1881d557ca82ffae676a867466e49c54 +size 7745545 diff --git a/data/showcase_examples/make_image_talk_multilingual/02 Multilingual Male.mp4 b/data/showcase_examples/make_image_talk_multilingual/02 Multilingual Male.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..7ac2be62c9f0810a075dde34585cd529ba53199a --- /dev/null +++ b/data/showcase_examples/make_image_talk_multilingual/02 Multilingual Male.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53ea7be9e49418776f5cdad9ce1a58138f3a6680add57b2e65b832a05613d471 +size 9625888 diff --git a/data/showcase_examples/make_image_talk_selfie/01 Selfie: Talk.mp4 b/data/showcase_examples/make_image_talk_selfie/01 Selfie: Talk.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..0ae38e4536d0fb60bcbbc2bc8aefb616d01cde7c --- /dev/null +++ b/data/showcase_examples/make_image_talk_selfie/01 Selfie: Talk.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:187f15defea7f8b803b255d4ae75563b7a500b458ee167d25b5f7e01d16f6fc0 +size 668715 diff --git a/data/showcase_examples/make_image_talk_selfie/02 Selfie: Suprised.mp4 b/data/showcase_examples/make_image_talk_selfie/02 Selfie: Suprised.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..dac3a00945b37c0c1ff6c7c400e064e3ea6db5da --- /dev/null +++ b/data/showcase_examples/make_image_talk_selfie/02 Selfie: Suprised.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f19ae3a4d347b246723b8886308af82e2c36bd9790b1e18f1f65a3d7487c8cf +size 648385 diff --git a/data/showcase_examples/make_image_talk_selfie/03 Selfie: Talk.mp4 b/data/showcase_examples/make_image_talk_selfie/03 Selfie: Talk.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..99708630e0aecfe117ccb895f034cffdb8d09cd2 --- /dev/null +++ b/data/showcase_examples/make_image_talk_selfie/03 Selfie: Talk.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0960d1af6f1a6c228907669e4c16f7821070486a88b3000fface3bebb2045e55 +size 1588248 diff --git a/data/showcase_examples/make_image_talk_selfie/04 Selfie: Confused.mp4 b/data/showcase_examples/make_image_talk_selfie/04 Selfie: Confused.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..12ec12edc03c9d44ae15aab97c66b2b8227287d1 --- /dev/null +++ b/data/showcase_examples/make_image_talk_selfie/04 Selfie: Confused.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29fb667bbcf07e3837a6482835e4c6f224d9e698e3aede0c3e00ec16be9700a1 +size 693799 diff --git a/data/showcase_examples/make_video_talk_corp_msg/03 Corporate Message.mp4 b/data/showcase_examples/make_video_talk_corp_msg/03 Corporate Message.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..8829166155759815004e2f85ec415ec5086f2b9e --- /dev/null +++ b/data/showcase_examples/make_video_talk_corp_msg/03 Corporate Message.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:116f531a519f94a03a3a706e145a457438d2e80acbdbe067a8b1c0d9692e0703 +size 2023374 diff --git a/data/showcase_examples/make_video_talk_multilingual/01 Multilingual Female.mp4 b/data/showcase_examples/make_video_talk_multilingual/01 Multilingual Female.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..da608e764c8193f1c41c4c9da0371f346a81dca0 --- /dev/null +++ b/data/showcase_examples/make_video_talk_multilingual/01 Multilingual Female.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb688eee1c4a79421e38f4beb25ee2290911438da19e8e5c79555e36c2103b0d +size 12867128 diff --git a/data/showcase_examples/make_video_talk_multilingual/02 Multilingual Male.mp4 b/data/showcase_examples/make_video_talk_multilingual/02 Multilingual Male.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..8a445549e005f5fd622e4884b79ec42a5f2a9c11 --- /dev/null +++ b/data/showcase_examples/make_video_talk_multilingual/02 Multilingual Male.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91d952c04e83cc68198cffa88b66aa9686a17df4361afdce5190286c4c831cef +size 13152523 diff --git a/data/showcase_examples/make_video_talk_rap_multii/04 Multi-Identities: Multilingual.mp4 b/data/showcase_examples/make_video_talk_rap_multii/04 Multi-Identities: Multilingual.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6afa0618d902d2557981367d40a6aa56db0dad89 --- /dev/null +++ b/data/showcase_examples/make_video_talk_rap_multii/04 Multi-Identities: Multilingual.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c73acf0aa5de87cda7e8a494655f81bad0bac3b6c0e716a9a0beb99c1fe763d +size 3416069 diff --git a/data/showcase_examples/make_video_talk_rap_multii/05 Multi-Identities: Rap.mp4 b/data/showcase_examples/make_video_talk_rap_multii/05 Multi-Identities: Rap.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6e1df4fe7a6adaefff7a96d8f3e6675a573732c0 --- /dev/null +++ b/data/showcase_examples/make_video_talk_rap_multii/05 Multi-Identities: Rap.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a6c0067bf028c912602e0b15c0a6bc1e3551a6bafccf748b5a9e0e80b6854b0 +size 1512493 diff --git a/data/showcase_examples/make_video_talk_rap_multii/06 Halloween Party Invitation.mp4 b/data/showcase_examples/make_video_talk_rap_multii/06 Halloween Party Invitation.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..da1eb393d0f68e46e12958285ace5d2704db3472 --- /dev/null +++ b/data/showcase_examples/make_video_talk_rap_multii/06 Halloween Party Invitation.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:174d81596f7526095b539d6026cb7b877b602833b0b8c6d109a659b9e91cc529 +size 1909804 diff --git a/elevenlabs_helper.py b/elevenlabs_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..4bd89335a1be6f7e27876ddb4044b851e97331d7 --- /dev/null +++ b/elevenlabs_helper.py @@ -0,0 +1,41 @@ +import numpy as np +from elevenlabs import save +from elevenlabs.client import ElevenLabs + +client = ElevenLabs() + + +class ElevenLabsHelper: + @staticmethod + def pad_buffer(audio): + # Pad buffer to multiple of 2 bytes + buffer_size = len(audio) + element_size = np.dtype(np.int16).itemsize + if buffer_size % element_size != 0: + audio = audio + b"\0" * (element_size - (buffer_size % element_size)) + return audio + + @staticmethod + def generate_voice(text, voice_name, audio_output_path): + try: + audio = client.generate( + text=text[:300], voice=voice_name, model="eleven_multilingual_v2" # Limit to 300 characters + ) + save(audio, audio_output_path) + except Exception as e: + raise RuntimeError(e) + + @staticmethod + def get_voices(): + return client.voices.get_all() + + @staticmethod + def select_voices(voices, labels): + result = [] + for voice in voices: + has_required_labels = True + for key, value in labels.items(): + has_required_labels = has_required_labels and key in voice.labels.keys() and voice.labels[key] == value + if has_required_labels: + result.append(voice.name) + return result diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b3925f7477973f03a12655775d4f49ec8c972c1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +gradio==5.3.0 +elevenlabs==1.8.1 +google-cloud-storage