Michael Sapienza commited on
Commit
ec17e66
·
0 Parent(s):

initial commit of sutra-avatar-v2

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +39 -0
  2. .gitignore +1 -0
  3. README.md +13 -0
  4. app.py +441 -0
  5. base_task_executor.py +179 -0
  6. cloud_task_executor.py +143 -0
  7. data/input_audio/gradio/female/en-BeesWingsBeat-Shelby.mp3 +3 -0
  8. data/input_audio/gradio/female/en-EnhanceEfficiency-Shelby.mp3 +3 -0
  9. data/input_audio/gradio/female/en-The2026WorldCup-Shelby.mp3 +3 -0
  10. data/input_audio/gradio/female/hi-BeesWingsBeat-Matilda.mp3 +3 -0
  11. data/input_audio/gradio/female/hi-EnhanceEfficiency-Matilda.mp3 +3 -0
  12. data/input_audio/gradio/female/hi-The2026WorldCup-Matilda.mp3 +3 -0
  13. data/input_audio/gradio/female/ko-BeesWingsBeat-Jinju.mp3 +3 -0
  14. data/input_audio/gradio/female/ko-EnhanceEfficiency-Jinju.mp3 +3 -0
  15. data/input_audio/gradio/female/ko-The2026WorldCup-Jinju.mp3 +3 -0
  16. data/input_audio/gradio/male/en-BeesWingsBeat-Marcus.mp3 +3 -0
  17. data/input_audio/gradio/male/en-EnhanceEfficiency-Marcus.mp3 +3 -0
  18. data/input_audio/gradio/male/en-The2026WorldCup-Marcus.mp3 +3 -0
  19. data/input_audio/gradio/male/hi-BeesWingsBeat-Liam.mp3 +3 -0
  20. data/input_audio/gradio/male/hi-EnhanceEfficiency-Liam.mp3 +3 -0
  21. data/input_audio/gradio/male/hi-The2026WorldCup-Liam.mp3 +3 -0
  22. data/input_audio/gradio/male/ko-BeesWingsBeat-Noah.mp3 +3 -0
  23. data/input_audio/gradio/male/ko-EnhanceEfficiency-Noah.mp3 +3 -0
  24. data/input_audio/gradio/male/ko-The2026WorldCup-Noah.mp3 +3 -0
  25. data/input_image_bases/female/01-Female-American_608.jpg +3 -0
  26. data/input_image_bases/female/02-Female-Indian01_608.jpg +3 -0
  27. data/input_image_bases/female/03-Female-Korean_608.jpg +3 -0
  28. data/input_image_bases/female/04-Female-Indian02_608.jpg +3 -0
  29. data/input_image_bases/female/05-Female-European_608.jpg +3 -0
  30. data/input_image_bases/male/01-Male-Indian_608.jpg +3 -0
  31. data/input_image_bases/male/02-Male-Korean_608.jpg +3 -0
  32. data/input_image_bases/male/03-Male-European_608.jpg +3 -0
  33. data/input_image_bases/male/04-Male-American_608.jpg +3 -0
  34. data/input_image_bases/male/05-Male-AfricanAmerican_608.jpg +3 -0
  35. data/input_video_bases/female/01-Female-Korean_608.mp4 +3 -0
  36. data/input_video_bases/female/02-Female-Latina_608.mp4 +3 -0
  37. data/input_video_bases/female/03-Female-European_608.mp4 +3 -0
  38. data/input_video_bases/female/04-Female-Indian_608.mp4 +3 -0
  39. data/input_video_bases/female/05-Female-American_608.mp4 +3 -0
  40. data/input_video_bases/male/01-Male-Japanese_608.mp4 +3 -0
  41. data/input_video_bases/male/02-Male-European_608.mp4 +3 -0
  42. data/input_video_bases/male/03-Male-American02_608.mp4 +3 -0
  43. data/input_video_bases/male/04-Male-Indian_608.mp4 +3 -0
  44. data/input_video_bases/male/05-Male-American_608.mp4 +3 -0
  45. data/showcase_examples/archive/01 Multilingual Female_720.mp4 +3 -0
  46. data/showcase_examples/archive/02 Multilingual Male_720.mp4 +3 -0
  47. data/showcase_examples/archive/02 Multilingual Male_720_IM.mp4 +3 -0
  48. data/showcase_examples/archive/03 Corporate Message_720.mp4 +3 -0
  49. data/showcase_examples/archive/04 Multi-Identities: Multilingual_720.mp4 +3 -0
  50. data/showcase_examples/archive/05 Multi-Identities: Rap_720.mp4 +3 -0
.gitattributes ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
37
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
38
+ *.jpg filter=lfs diff=lfs merge=lfs -text
39
+ *.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: sutra-avatar-v2
3
+ emoji: 🐨
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 5.3.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+
3
+ import argparse
4
+ import glob
5
+ import os
6
+ from pathlib import Path
7
+
8
+ import gradio as gr
9
+
10
+ from cloud_task_executor import CloudTaskExecutor
11
+ from elevenlabs_helper import ElevenLabsHelper
12
+
13
+ # ---
14
+ talk_key = "talk"
15
+ valid_base_motion_expressions = [
16
+ f"{talk_key}-head",
17
+ f"{talk_key}-neutral",
18
+ "smile",
19
+ "approve",
20
+ "disapprove",
21
+ "confused",
22
+ "sad",
23
+ "surprised",
24
+ ]
25
+
26
+
27
+ def get_default_base_motion_expression():
28
+ return valid_base_motion_expressions[0]
29
+
30
+
31
+ # ---
32
+
33
+
34
+ def get_sorted_filenames_in_dir(dir_path: str, ext: str = ".jpg", throw_if_empty: bool = True) -> list:
35
+ """Return the sorted filenames in the spedified directory."""
36
+ p = Path(dir_path)
37
+ if not p.exists() and not p.is_dir():
38
+ raise RuntimeError(f"The path: {dir_path} does not exist")
39
+
40
+ if not os.listdir(dir_path):
41
+ message = f"The path: {dir_path} is empty"
42
+ if throw_if_empty:
43
+ raise RuntimeError(message)
44
+ else:
45
+ return []
46
+
47
+ search_string = str(dir_path) + "/*" + ext
48
+ return sorted(glob.glob(search_string))
49
+
50
+
51
+ # ---
52
+
53
+
54
+ description = """Experience a demo of the world's most advanced Text/Audio To Video (TTV) system, crafted by Two AI.
55
+ Sign up with Two AI to gain rapid, long-form generation, API keys, and more!"""
56
+
57
+ # Core constants
58
+ tmp_dir = "/tmp/gradio"
59
+ data_dir = "./data"
60
+ male_key = "male"
61
+ female_key = "female"
62
+ unknown_key = "unknown"
63
+ media_height = 512
64
+
65
+ # Male/Female
66
+ female_terms = ["Female", "Lady", "Woman"]
67
+ male_terms = ["Male", "Lad", "Man"]
68
+
69
+ # Elevenlabs Voices #
70
+ all_voices = ElevenLabsHelper.get_voices()
71
+ voices_ = [voice for voice in all_voices.voices if len(voice.name.split(" ")) < 2 and len(voice.name) < 10]
72
+ female_voice_names = ElevenLabsHelper.select_voices(voices_, labels={"gender": female_key, "age": "young"})
73
+ male_voice_names = ElevenLabsHelper.select_voices(voices_, labels={"gender": male_key, "age": "young"})
74
+ male_voice_names.remove("Priya")
75
+ voices = {
76
+ female_key: female_voice_names,
77
+ male_key: male_voice_names,
78
+ unknown_key: female_voice_names + male_voice_names,
79
+ }
80
+
81
+ # Examples
82
+ # Base Images
83
+ example_base_image_dir = os.path.join(data_dir, "input_image_bases")
84
+ example_base_images = {
85
+ female_key: get_sorted_filenames_in_dir(os.path.join(example_base_image_dir, female_key), ext=".jpg"),
86
+ male_key: get_sorted_filenames_in_dir(os.path.join(example_base_image_dir, male_key), ext=".jpg"),
87
+ }
88
+
89
+ # Base Videos
90
+ example_base_video_dir = os.path.join(data_dir, "input_video_bases")
91
+ example_source_videos = {
92
+ female_key: get_sorted_filenames_in_dir(os.path.join(example_base_video_dir, female_key), ext=".mp4"),
93
+ male_key: get_sorted_filenames_in_dir(os.path.join(example_base_video_dir, male_key), ext=".mp4"),
94
+ }
95
+
96
+ # Driving Audio
97
+ example_driving_audio_dir = os.path.join(data_dir, "input_audio/gradio")
98
+ example_driving_audios_male = get_sorted_filenames_in_dir(os.path.join(example_driving_audio_dir, male_key), ext=".mp3")
99
+ example_driving_audios_female = get_sorted_filenames_in_dir(
100
+ os.path.join(example_driving_audio_dir, female_key), ext=".mp3"
101
+ )
102
+ example_driving_audios = {female_key: example_driving_audios_female, male_key: example_driving_audios_male}
103
+
104
+ # Driving Text
105
+ audio_text_groups = ["General", "Promotional Messages", "Pronunciation Practice"]
106
+ example_driving_audio_texts = {
107
+ "General": [
108
+ "The 2026 World Cup final match is in New York.",
109
+ "Enhance efficiency and cut costs with AI.",
110
+ "A bee's wings beat more than 200 times per second.",
111
+ "2026년 월드컵 결승전은 뉴욕에서 열립니다.",
112
+ "AI로 효율성을 높이고 비용을 절감하세요.",
113
+ "벌은 초당 200회 이상의 날개짓을 합니다.",
114
+ "2026 विश्व कप फाइनल मैच न्यूयॉर्क में होगा।",
115
+ "AI के साथ दक्षता बढ़ाएं और लागत कम करें।",
116
+ "मधुमक्खी के पंख सेकंड में 200 बार से अधिक फड़फड़ाते हैं।",
117
+ ],
118
+ "Promotional Messages": [
119
+ "Welcome to our kiosk, where you can easily purchase tickets, or access various services by simply tapping the display!",
120
+ "Catch all the drama, emotion, and energy in my new film, now available on Netflix—it's a must-watch!",
121
+ "This season of IPL is full of surprises, and I’d love to see you supporting us as we fight for victory on the ground.",
122
+ "Transform your health with our latest fitness programs! Join us today and take the first step toward a stronger, energized you.",
123
+ ],
124
+ "Pronunciation Practice": [
125
+ "A big black bug bit a big black dog on his big black nose.",
126
+ "Fuzzy Wuzzy was a bear. Fuzzy Wuzzy had no hair. Fuzzy Wuzzy wasn't very fuzzy, was he?",
127
+ ],
128
+ }
129
+
130
+ example_showcase_dir = os.path.join(data_dir, "showcase_examples")
131
+ examples_showcase = {
132
+ "make_image_talk_multilingual": get_sorted_filenames_in_dir(
133
+ os.path.join(example_showcase_dir, "make_image_talk_multilingual"), ext=".mp4"
134
+ ),
135
+ "make_image_talk_cartoon": get_sorted_filenames_in_dir(
136
+ os.path.join(example_showcase_dir, "make_image_talk_cartoon"), ext=".mp4"
137
+ ),
138
+ "make_image_talk_diff_angles": get_sorted_filenames_in_dir(
139
+ os.path.join(example_showcase_dir, "make_image_talk_diff_angles"), ext=".mp4"
140
+ ),
141
+ "make_image_talk_hb": get_sorted_filenames_in_dir(
142
+ os.path.join(example_showcase_dir, "make_image_talk_hb"), ext=".mp4"
143
+ ),
144
+ "make_video_talk_multilingual": get_sorted_filenames_in_dir(
145
+ os.path.join(example_showcase_dir, "make_video_talk_multilingual"), ext=".mp4"
146
+ ),
147
+ "make_video_talk_corp_msg": get_sorted_filenames_in_dir(
148
+ os.path.join(example_showcase_dir, "make_video_talk_corp_msg"), ext=".mp4"
149
+ ),
150
+ "make_video_talk_rap_multii": get_sorted_filenames_in_dir(
151
+ os.path.join(example_showcase_dir, "make_video_talk_rap_multii"), ext=".mp4"
152
+ ),
153
+ "dubbing_superpowerman": get_sorted_filenames_in_dir(os.path.join(example_showcase_dir, "dubbing_superpowerman"), ext=".mp4"),
154
+ "make_image_talk_selfie": get_sorted_filenames_in_dir(os.path.join(example_showcase_dir, "make_image_talk_selfie"), ext=".mp4"),
155
+ "dubbing_coffee": get_sorted_filenames_in_dir(os.path.join(example_showcase_dir, "dubbing_coffee"), ext=".mp4"),
156
+ }
157
+
158
+
159
+ def update_voices(media_path):
160
+ def get_category(media_path):
161
+ if media_path:
162
+ for fterm in female_terms:
163
+ if fterm in media_path or fterm.lower() in media_path:
164
+ return female_key
165
+
166
+ for mterm in male_terms:
167
+ if mterm in media_path or mterm.lower() in media_path:
168
+ return male_key
169
+
170
+ return unknown_key
171
+
172
+ category = get_category(media_path)
173
+ driving_input_voice = gr.Dropdown(
174
+ choices=voices[category],
175
+ value=voices[category][0],
176
+ interactive=True,
177
+ )
178
+ return driving_input_voice
179
+
180
+
181
+ def task_executor_fn(
182
+ input_base_path, base_motion_expression, input_driving_audio_path, driving_text_input, driving_voice_input
183
+ ):
184
+
185
+ return task_executor.execute_task(
186
+ input_base_path, base_motion_expression, input_driving_audio_path, driving_text_input, driving_voice_input
187
+ )
188
+
189
+ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo_image:
190
+ with gr.Row():
191
+ # Step 1: Choose Image
192
+ with gr.Column(scale=4):
193
+ gr.Markdown("### Step 1: Choose Image")
194
+ gr.Markdown("Upload or select an example image to drive.")
195
+ with gr.Accordion(open=True, label="Base Image"):
196
+ base_image_input = gr.Image(type="filepath", sources="upload", height=media_height)
197
+ gr.Examples(
198
+ examples=[[example] for example in example_base_images[female_key]],
199
+ inputs=[base_image_input],
200
+ cache_examples=False,
201
+ label="Female",
202
+ )
203
+ gr.Examples(
204
+ examples=[[example] for example in example_base_images[male_key]],
205
+ inputs=[base_image_input],
206
+ cache_examples=False,
207
+ label="Male",
208
+ )
209
+
210
+ # Step 2: Motion and Audio/TTS
211
+ with gr.Column(scale=4):
212
+ gr.Markdown("### Step 2: Motion and Audio/TTS")
213
+ gr.Markdown("Select motion and provide audio or text for lip-sync.")
214
+ with gr.Accordion(open=True, label="Base Motion"):
215
+ base_motion_expression = gr.Radio(
216
+ choices=valid_base_motion_expressions,
217
+ label="Select base motion",
218
+ value=get_default_base_motion_expression(),
219
+ )
220
+ with gr.Tabs():
221
+ with gr.TabItem("Driving Audio: File") as tab_audio_file:
222
+ with gr.Accordion(open=True, label="Driving Audio: From File"):
223
+ driving_audio_input = gr.Audio(sources=["upload"], type="filepath")
224
+ gr.Examples(
225
+ examples=[[example] for example in example_driving_audios[female_key]],
226
+ inputs=[driving_audio_input],
227
+ cache_examples=False,
228
+ examples_per_page=18,
229
+ label="Female",
230
+ )
231
+ gr.Examples(
232
+ examples=[[example] for example in example_driving_audios[male_key]],
233
+ inputs=[driving_audio_input],
234
+ cache_examples=False,
235
+ examples_per_page=18,
236
+ label="Male",
237
+ )
238
+
239
+ with gr.TabItem("Driving Audio: TTS") as tab_audio_tts:
240
+ with gr.Accordion(open=True, label="Driving Audio: From Text"):
241
+ driving_input_voice = gr.Dropdown(
242
+ choices=voices[unknown_key], value=voices[unknown_key][0], label="Voice"
243
+ )
244
+ driving_text_input = gr.Textbox(
245
+ label="Input Text (300 characters max)",
246
+ lines=2,
247
+ )
248
+ for group in audio_text_groups:
249
+ gr.Examples(
250
+ examples=[[example] for example in example_driving_audio_texts[group]],
251
+ inputs=[driving_text_input],
252
+ cache_examples=False,
253
+ label=group,
254
+ )
255
+
256
+ # Step 3: Result
257
+ with gr.Column(scale=4):
258
+ gr.Markdown("### Step 3: Result")
259
+ gr.Markdown("Generate and view the output video.")
260
+ process_button_animation = gr.Button("🌟 Generate", variant="primary")
261
+ output_video_i2v = gr.Video(autoplay=True, label="The Output Video", height=media_height)
262
+ message = gr.Textbox(label="Info")
263
+ process_button_reset = gr.ClearButton(
264
+ [
265
+ base_image_input,
266
+ driving_audio_input,
267
+ driving_text_input,
268
+ driving_input_voice,
269
+ output_video_i2v,
270
+ ],
271
+ value="🧹 Clear",
272
+ )
273
+
274
+ base_image_input.change(fn=update_voices, inputs=[base_image_input], outputs=[driving_input_voice])
275
+
276
+ # binding functions for buttons
277
+ process_button_animation.click(
278
+ fn=task_executor_fn,
279
+ inputs=[
280
+ base_image_input,
281
+ base_motion_expression,
282
+ driving_audio_input,
283
+ driving_text_input,
284
+ driving_input_voice,
285
+ ],
286
+ outputs=[output_video_i2v, output_video_i2v, message],
287
+ show_progress=True,
288
+ )
289
+
290
+ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo_video:
291
+ with gr.Row():
292
+ # Step 1: Choose Video
293
+ with gr.Column(scale=4):
294
+ gr.Markdown("### Step 1: Choose Video")
295
+ gr.Markdown("Upload or select an example video to drive.")
296
+ with gr.Accordion(open=True, label="Base Video"):
297
+ base_video_input = gr.Video(sources="upload", height=media_height, interactive=True)
298
+ gr.Examples(
299
+ examples=[[example] for example in example_source_videos[female_key]],
300
+ inputs=[base_video_input],
301
+ cache_examples=False,
302
+ label="Female",
303
+ )
304
+ gr.Examples(
305
+ examples=[[example] for example in example_source_videos[male_key]],
306
+ inputs=[base_video_input],
307
+ cache_examples=False,
308
+ label="Male",
309
+ )
310
+
311
+ # Step 2: Audio/TTS
312
+ with gr.Column(scale=4):
313
+ gr.Markdown("### Step 2: Audio/TTS")
314
+ gr.Markdown("Provide audio or text for lip-sync.")
315
+ with gr.Tabs():
316
+ with gr.TabItem("Driving Audio: File") as tab_audio_file:
317
+ with gr.Accordion(open=True, label="Driving Audio: From File"):
318
+ driving_audio_input = gr.Audio(sources=["upload"], type="filepath")
319
+ gr.Examples(
320
+ examples=[[example] for example in example_driving_audios[female_key]],
321
+ inputs=[driving_audio_input],
322
+ cache_examples=False,
323
+ examples_per_page=18,
324
+ label="Female",
325
+ )
326
+ gr.Examples(
327
+ examples=[[example] for example in example_driving_audios[male_key]],
328
+ inputs=[driving_audio_input],
329
+ cache_examples=False,
330
+ examples_per_page=18,
331
+ label="Male",
332
+ )
333
+ with gr.TabItem("Driving Audio: TTS") as tab_audio_tts:
334
+ with gr.Accordion(open=True, label="Driving Audio: From Text"):
335
+ driving_input_voice = gr.Dropdown(
336
+ choices=voices[unknown_key], value=voices[unknown_key][0], label="Voice"
337
+ )
338
+ driving_text_input = gr.Textbox(
339
+ label="Input Text (300 characters max)",
340
+ lines=2,
341
+ )
342
+ for group in audio_text_groups:
343
+ gr.Examples(
344
+ examples=[[example] for example in example_driving_audio_texts[group]],
345
+ inputs=[driving_text_input],
346
+ cache_examples=False,
347
+ label=group,
348
+ )
349
+ # Step 3: Result
350
+ with gr.Column(scale=4):
351
+ gr.Markdown("### Step 3: Result")
352
+ gr.Markdown("Generate and view the output video.")
353
+ process_button_animation = gr.Button("🌟 Generate", variant="primary")
354
+ output_video_i2v = gr.Video(autoplay=True, label="The Output Video", height=media_height)
355
+ message = gr.Textbox(label="Info")
356
+ process_button_reset = gr.ClearButton(
357
+ [base_video_input, driving_audio_input, driving_text_input, driving_input_voice, output_video_i2v],
358
+ value="🧹 Clear",
359
+ )
360
+
361
+ base_video_input.change(fn=update_voices, inputs=[base_video_input], outputs=[driving_input_voice])
362
+
363
+ # binding functions for buttons
364
+ base_motion_expression = gr.Radio(value=None, visible=False)
365
+ process_button_animation.click(
366
+ fn=task_executor_fn,
367
+ inputs=[
368
+ base_video_input,
369
+ base_motion_expression,
370
+ driving_audio_input,
371
+ driving_text_input,
372
+ driving_input_voice,
373
+ ],
374
+ outputs=[output_video_i2v, output_video_i2v, message],
375
+ show_progress=True,
376
+ )
377
+
378
+ with gr.Blocks() as showcase_examples:
379
+ gr.Markdown("# Make Image Talk")
380
+ with gr.Row():
381
+ with gr.Column(scale=7):
382
+ for path in examples_showcase["make_image_talk_multilingual"]:
383
+ gr.Video(value=path, label=os.path.basename(path), height=300)
384
+ with gr.Column(scale=3):
385
+ for path in examples_showcase["make_image_talk_cartoon"]:
386
+ gr.Video(value=path, label=os.path.basename(path), height=616)
387
+ with gr.Row():
388
+ with gr.Column(scale=7):
389
+ for path in examples_showcase["make_image_talk_diff_angles"]:
390
+ gr.Video(value=path, label=os.path.basename(path), height=350)
391
+ with gr.Column(scale=3):
392
+ for path in examples_showcase["make_image_talk_hb"]:
393
+ gr.Video(value=path, label=os.path.basename(path), height=350)
394
+ with gr.Row():
395
+ for path in examples_showcase['make_image_talk_selfie']:
396
+ gr.Video(value=path, label=os.path.basename(path), height=430)
397
+
398
+ gr.Markdown("# Make Video Talk")
399
+ with gr.Row():
400
+ with gr.Column(scale=7):
401
+ for path in examples_showcase["make_video_talk_multilingual"]:
402
+ gr.Video(value=path, label=os.path.basename(path), height=300)
403
+ with gr.Column(scale=3):
404
+ for path in examples_showcase["make_video_talk_corp_msg"]:
405
+ gr.Video(value=path, label=os.path.basename(path), height=616)
406
+ with gr.Row():
407
+ for path in examples_showcase["make_video_talk_rap_multii"]:
408
+ gr.Video(value=path, label=os.path.basename(path), height=500)
409
+
410
+ gr.Markdown("# Dubbing")
411
+ with gr.Row():
412
+ for path in examples_showcase["dubbing_superpowerman"]:
413
+ gr.Video(value=path, label=os.path.basename(path), height=320)
414
+ with gr.Row():
415
+ for path in examples_showcase["dubbing_coffee"]:
416
+ gr.Video(value=path, label=os.path.basename(path), height=440)
417
+
418
+ with gr.Blocks(analytics_enabled=False, css="footer{display:none !important}", title="SUTRA Avatar v2") as demo:
419
+ gr.Markdown(
420
+ """
421
+ ## <img src="https://playground.two.ai/sutra.svg" height="20"/>
422
+ """
423
+ )
424
+ title = "# 🌟 SUTRA Avatar v2 🌟\n## Drive Image or Video with LipSync from Audio or Text"
425
+ gr.Markdown(title)
426
+ gr.Markdown(description)
427
+
428
+ gr.TabbedInterface(
429
+ interface_list=[demo_image, demo_video, showcase_examples],
430
+ tab_names=["Drive Image", "Drive Video", "Showcase Examples"],
431
+ )
432
+
433
+ if __name__ == "__main__":
434
+ parser = argparse.ArgumentParser(description="SUTRA AVATAR CLIENT")
435
+ args = parser.parse_args()
436
+ task_executor = CloudTaskExecutor()
437
+
438
+ demo.queue(default_concurrency_limit=10).launch(
439
+ server_name="0.0.0.0",
440
+ allowed_paths=["/"],
441
+ )
base_task_executor.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ import re
4
+ import shutil
5
+ import time
6
+ from abc import ABC, abstractmethod
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+
10
+ import gradio as gr
11
+
12
+ from elevenlabs_helper import ElevenLabsHelper
13
+
14
+ # ---
15
+ talk_key = "talk"
16
+
17
+ # ---
18
+
19
+ valid_image_exts = (".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp")
20
+
21
+
22
+ def is_image(file_path):
23
+ return file_path.lower().endswith(valid_image_exts)
24
+
25
+
26
+ def get_formatted_datetime_name() -> str:
27
+ d = datetime.now()
28
+ return d.strftime("d%y%m%d" + "-" + "t%H%M%S")
29
+
30
+
31
+ def get_name_ext(filepath):
32
+ filepath = os.path.abspath(filepath)
33
+ _, name_ext = os.path.split(filepath)
34
+ name, ext = os.path.splitext(name_ext)
35
+ return name, ext
36
+
37
+
38
+ def sanitize_string(string):
39
+ sanitized_string = re.sub(r"[^A-Za-z0-9]", "", string)
40
+ max_len = 15
41
+ return sanitized_string[:max_len]
42
+
43
+
44
+ def get_output_video_name(
45
+ input_base_path, input_driving_path, base_motion_expression, input_driving_audio_path, tag=""
46
+ ):
47
+ if not tag:
48
+ tag = get_formatted_datetime_name()
49
+
50
+ base_name, _ = get_name_ext(input_base_path)
51
+ base_name = sanitize_string(base_name)
52
+
53
+ driving_name = ""
54
+ if input_driving_path:
55
+ driving_name, _ = get_name_ext(input_driving_path)
56
+ driving_name = sanitize_string(driving_name)
57
+ elif base_motion_expression and is_image(input_base_path):
58
+ driving_name = base_motion_expression
59
+
60
+ audio_name = ""
61
+ if input_driving_audio_path:
62
+ audio_name, _ = get_name_ext(input_driving_audio_path)
63
+ audio_name = sanitize_string(audio_name)
64
+
65
+ output_video_name = f"{tag}--b-{base_name}"
66
+
67
+ if driving_name:
68
+ output_video_name += f"--d-{driving_name}"
69
+
70
+ if audio_name:
71
+ output_video_name += f"--a-{audio_name}"
72
+ return output_video_name
73
+
74
+
75
+ def generate_random_integer(num_digits):
76
+ current_time = int(time.time() * 1000)
77
+ random.seed(current_time)
78
+ lower_bound = 0
79
+ upper_bound = (10**num_digits) - 1
80
+ return random.randint(lower_bound, upper_bound)
81
+
82
+
83
+ def get_unique_name(maxd=4, delim="-"):
84
+ pid = os.getpid()
85
+ pid_str = str(pid)[-maxd:]
86
+
87
+ time_ns = time.time_ns()
88
+ time_str = str(time_ns)[-maxd:]
89
+
90
+ rint = generate_random_integer(maxd)
91
+ rint_str = str(rint).zfill(maxd)
92
+ return delim.join([pid_str, time_str, rint_str])
93
+
94
+
95
+ def mkdir_p(path: str) -> None:
96
+ if not Path(path).exists():
97
+ Path(path).mkdir(parents=True)
98
+
99
+
100
+ # ---
101
+
102
+
103
+ class BaseTaskExecutor(ABC):
104
+ def __init__(self):
105
+ self.tmp_dir = "/tmp/gradio"
106
+
107
+ def execute_task(
108
+ self, input_base_path, base_motion_expression, input_driving_audio_path, driving_text_input, driving_voice_input
109
+ ):
110
+ tag = get_unique_name()
111
+ output_dir = os.path.join(self.tmp_dir, tag)
112
+ mkdir_p(output_dir)
113
+
114
+ do_dafile = input_driving_audio_path is not None and os.path.exists(input_driving_audio_path)
115
+ do_datts = driving_text_input and driving_voice_input
116
+ do_talk = do_dafile or do_datts
117
+
118
+ if base_motion_expression:
119
+ if talk_key not in base_motion_expression and do_talk:
120
+ gr.Warning(
121
+ f"Ignoring Driving Audio since expressive Base Motion selected: {base_motion_expression}")
122
+ do_dafile = False
123
+ do_datts = False
124
+ do_talk = False
125
+
126
+ if talk_key in base_motion_expression and not do_talk:
127
+ gr.Warning(f"Selected talking Base Motion but no Driving Audio")
128
+ else:
129
+ base_motion_expression = ""
130
+
131
+ if do_datts:
132
+ if do_dafile:
133
+ gr.Warning("Ignoring Audio File input since TTS is selected.\nClear the undesired input if this is not intended.")
134
+ output_audio_file = os.path.join(f"{output_dir}/{tag}.mp3")
135
+ ElevenLabsHelper.generate_voice(driving_text_input, driving_voice_input, output_audio_file)
136
+ input_driving_audio_path = output_audio_file
137
+
138
+ if not do_talk:
139
+ input_driving_audio_path = ""
140
+
141
+ if input_base_path is not None and os.path.exists(input_base_path):
142
+ input_driving_path = ""
143
+ request_id = get_unique_name(maxd=8, delim="")
144
+ output_video_path = os.path.join(
145
+ self.tmp_dir,
146
+ get_output_video_name(
147
+ input_base_path, input_driving_path, base_motion_expression, input_driving_audio_path
148
+ )
149
+ + ".mp4",
150
+ )
151
+ result, output_video_path = self.generate(
152
+ input_base_path,
153
+ input_driving_path,
154
+ base_motion_expression,
155
+ input_driving_audio_path,
156
+ output_video_path,
157
+ request_id,
158
+ )
159
+ success = result["success"]
160
+ messages = result["messages"]
161
+
162
+ self.clean(output_dir)
163
+
164
+ if success:
165
+ return output_video_path, gr.update(visible=True), messages
166
+ else:
167
+ gr.Info("Task could not be completed", duration=4)
168
+ return None, gr.update(visible=False), f"ERROR\n\n{messages}"
169
+ else:
170
+ self.clean(output_dir)
171
+ raise gr.Error("No source selected!", duration=6)
172
+
173
+ @abstractmethod
174
+ def generate(self):
175
+ pass
176
+
177
+ def clean(self, output_dir):
178
+ if os.path.isdir(output_dir):
179
+ shutil.rmtree(output_dir)
cloud_task_executor.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import json
3
+ import ntpath
4
+ import os
5
+ import time
6
+
7
+ import gradio as gr
8
+ import requests
9
+ from google.cloud import storage
10
+
11
+ from base_task_executor import BaseTaskExecutor
12
+
13
+ # ---
14
+ enc = "utf-8"
15
+
16
+
17
+ def decode(string):
18
+ return json.loads(base64.b64decode(string.encode(enc)).decode(enc))
19
+
20
+
21
+ def get_storage_client_from_env():
22
+ credentials_json = decode(os.environ["GCP_API_KEY"])
23
+ return storage.Client.from_service_account_info(credentials_json)
24
+
25
+
26
+ def get_name_ext(filepath):
27
+ filepath = os.path.abspath(filepath)
28
+ _, name_ext = os.path.split(filepath)
29
+ name, ext = os.path.splitext(name_ext)
30
+ return name, ext
31
+
32
+
33
+ def make_remote_media_path(request_id, media_path):
34
+ assert len(request_id) > 6
35
+ assert os.path.exists(media_path)
36
+ src_id = request_id[:3]
37
+ slot_id = request_id[3:6]
38
+ request_suffix = request_id[6:]
39
+ name, ext = get_name_ext(media_path)
40
+ return os.path.join(src_id, slot_id, request_suffix, name + ext)
41
+
42
+
43
+ def copy_file_to_gcloud(bucket, local_file_path, remote_file_path):
44
+ blob = bucket.blob(remote_file_path)
45
+ blob.upload_from_filename(local_file_path)
46
+
47
+ def copy_to_gcloud(storage_client, local_media_path, bucket_name, remote_media_path):
48
+ bucket = storage_client.get_bucket(bucket_name)
49
+ copy_file_to_gcloud(bucket, local_media_path, remote_media_path)
50
+
51
+
52
+ # ---
53
+
54
+
55
+ class CloudTaskExecutor(BaseTaskExecutor):
56
+ def __init__(self):
57
+ super().__init__()
58
+ self.base_url = os.getenv("SUTRA_AVATAR_BASE_URL")
59
+ self.headers = {"Authorization": f'{os.getenv("SUTRA_AVATAR_API_KEY")}', "Content-Type": "application/json"}
60
+ self.bucket_name = os.getenv("SUTRA_AVATAR_BUCKET_NAME")
61
+ self.storage_client = get_storage_client_from_env()
62
+
63
+ def submit_task(self, submit_request):
64
+ url = f"{self.base_url}/task/submit"
65
+ response = requests.post(url, json=submit_request, headers=self.headers)
66
+ if response.status_code == 200:
67
+ return response.json()
68
+ else:
69
+ response.raise_for_status()
70
+
71
+ def get_task_status(self, request_id):
72
+ url = f"{self.base_url}/task/status"
73
+ response = requests.get(url, params={"rid": request_id}, headers=self.headers)
74
+ if response.status_code == 200:
75
+ return response.json()
76
+ else:
77
+ response.raise_for_status()
78
+
79
+ def generate(
80
+ self,
81
+ input_base_path,
82
+ input_driving_path,
83
+ base_motion_expression,
84
+ input_driving_audio_path,
85
+ output_video_path,
86
+ request_id,
87
+ ):
88
+
89
+ # Upload files
90
+ media_paths = [input_base_path, input_driving_audio_path]
91
+ for media_path in media_paths:
92
+ if media_path:
93
+ remote_media_path = make_remote_media_path(request_id, media_path)
94
+ copy_to_gcloud(self.storage_client, media_path, self.bucket_name, remote_media_path)
95
+
96
+ submit_request = {
97
+ "requestId": request_id,
98
+ "input_base_path": ntpath.basename(input_base_path),
99
+ "input_driving_path": "",
100
+ "base_motion_expression": base_motion_expression,
101
+ "input_driving_audio_path": ntpath.basename(input_driving_audio_path),
102
+ "output_video_path": ntpath.basename(output_video_path),
103
+ }
104
+ submit_reply = self.submit_task(submit_request)
105
+ estimatedWaitSeconds = "unknown"
106
+ if "estimatedWaitSeconds" in submit_reply.keys():
107
+ estimatedWaitSeconds = submit_reply["estimatedWaitSeconds"]
108
+
109
+ completion_statuses = {"Succeeded", "Cancelled", "Failed", "NotFound"}
110
+ timeout = 240 # maximum time to wait in seconds
111
+ if isinstance(estimatedWaitSeconds, int):
112
+ timeout += estimatedWaitSeconds
113
+ start_time = time.time()
114
+
115
+ result = {"messages": ''}
116
+ while True:
117
+ status_reply = self.get_task_status(request_id)
118
+ task_status = status_reply["taskStatus"]
119
+
120
+ if status_reply["taskStatus"] in completion_statuses:
121
+ break
122
+
123
+ if time.time() - start_time > timeout:
124
+ msg = "The task did not complete within the timeout period.\n The server is very busy serving other requests.\n Please try again."
125
+ result["success"] = False
126
+ result["messages"] = msg
127
+ gr.Error(msg)
128
+ break
129
+ time.sleep(3)
130
+
131
+ task_status = status_reply["taskStatus"]
132
+ if task_status == "Succeeded":
133
+ pipe_reply = status_reply["pipeReply"]
134
+ result["success"] = pipe_reply["status"] == "success"
135
+ result["messages"] = pipe_reply["messages"]
136
+ output_video_path = status_reply["videoURL"]
137
+ else:
138
+ messages = ""
139
+ if "pipeReply" in status_reply.keys():
140
+ messages = status_reply["pipeReply"]["messages"]
141
+ result["success"] = False
142
+ result["messages"] += messages
143
+ return result, output_video_path
data/input_audio/gradio/female/en-BeesWingsBeat-Shelby.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2a85a13e25fb823143e26a39ce6de823861199b90784db4461a243d01f87201
3
+ size 55588
data/input_audio/gradio/female/en-EnhanceEfficiency-Shelby.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35135724f58a72574c9f92e5bcdec1c41eac7f02480fc306b648263f0750a742
3
+ size 60604
data/input_audio/gradio/female/en-The2026WorldCup-Shelby.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c4054806558c0f2b26313a5b352b042fdc7dba0c90eac36e9c0c667dd00bcf3
3
+ size 71053
data/input_audio/gradio/female/hi-BeesWingsBeat-Matilda.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:981852faccc81eccf82effc8ad3a2bef134c447c038ec15c4c7ff418c1a40c25
3
+ size 57678
data/input_audio/gradio/female/hi-EnhanceEfficiency-Matilda.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:568d0dd0fad0648e711fa50e0c048cad18df52e03a87503ff382379686acf89b
3
+ size 48065
data/input_audio/gradio/female/hi-The2026WorldCup-Matilda.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a304d592f4d2b10a91f7b82b25416813ca891b50e64fb513aa7f3cf1b8f0cd7c
3
+ size 53498
data/input_audio/gradio/female/ko-BeesWingsBeat-Jinju.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13e97b106f1757b8f64cecb33ae9265eaba0dfa5a28bb6f27d1f42534937f203
3
+ size 47229
data/input_audio/gradio/female/ko-EnhanceEfficiency-Jinju.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34144828e9499c22fa5d7be6a621aadea5f0a25d68dca04a6ad3b65f01dfa36d
3
+ size 48065
data/input_audio/gradio/female/ko-The2026WorldCup-Jinju.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60f6dc9a567be17f2edc9d4fa5e877a4025e7acabdc4260014612b420f7b2981
3
+ size 57678
data/input_audio/gradio/male/en-BeesWingsBeat-Marcus.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b677ad256f0d28d1c9c9afabb347d7b1520aadd1b0e19ca09665fe3b9a7adfed
3
+ size 46811
data/input_audio/gradio/male/en-EnhanceEfficiency-Marcus.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:770cce3bbfca0913ceb8651584d6515c8f271bffb45d11e0f76ecf96af19e00a
3
+ size 40542
data/input_audio/gradio/male/en-The2026WorldCup-Marcus.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86f34c9f42944b8a76cc727c06f28556630d94a82304b37890919cb64d8cab51
3
+ size 57260
data/input_audio/gradio/male/hi-BeesWingsBeat-Liam.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f767e72ca739f8e3ba3edea24f5f9b533bfdbef37c60db02125dd1c18d54a1ef
3
+ size 64365
data/input_audio/gradio/male/hi-EnhanceEfficiency-Liam.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcdb3e0776e8aa60778d97dc9a73beaa81b6b94a2b31cf4e34437fdc12233425
3
+ size 50991
data/input_audio/gradio/male/hi-The2026WorldCup-Liam.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f292ae2e165b6fb713807888ab604848bf02f162f1621d47cd06bfc1926dd7f
3
+ size 54752
data/input_audio/gradio/male/ko-BeesWingsBeat-Noah.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bdcbdf30de7b6fbadd04099c08e47812311aeb1fcc5bb2c87ac4d92ab5d9a90
3
+ size 47229
data/input_audio/gradio/male/ko-EnhanceEfficiency-Noah.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be33748c1b19c74abb6f2daaa343d4c5c2c5c8c00a7a03d2fbc20ca8e08ef9a6
3
+ size 44303
data/input_audio/gradio/male/ko-The2026WorldCup-Noah.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4ea9b5d46d6e419b59b875b0b84203170736a394e4eb676e7da70af8261d64
3
+ size 58514
data/input_image_bases/female/01-Female-American_608.jpg ADDED

Git LFS Details

  • SHA256: b8f78b7e0c7e81ed5aa6d94d6f7a4197921363ec1e35e69c1d38b9095be8469c
  • Pointer size: 131 Bytes
  • Size of remote file: 228 kB
data/input_image_bases/female/02-Female-Indian01_608.jpg ADDED

Git LFS Details

  • SHA256: 8743f2c7b9c5f09bacd5aca74ae983a2c83a9501e1af0c5b4765c33f80286b51
  • Pointer size: 131 Bytes
  • Size of remote file: 211 kB
data/input_image_bases/female/03-Female-Korean_608.jpg ADDED

Git LFS Details

  • SHA256: b4b77e5db173fd080841145a82b2937fd5365f0ff1563762aed43789cbb865da
  • Pointer size: 131 Bytes
  • Size of remote file: 214 kB
data/input_image_bases/female/04-Female-Indian02_608.jpg ADDED

Git LFS Details

  • SHA256: 0db7c64c33a13797a5c72b427df2803c45560a9a7cc606897a91f1b4a81aee69
  • Pointer size: 131 Bytes
  • Size of remote file: 245 kB
data/input_image_bases/female/05-Female-European_608.jpg ADDED

Git LFS Details

  • SHA256: 736a1a4f72f8c221bdae6a9cc438e1bb6058892353c8611d2cc4731eba2bfa0d
  • Pointer size: 131 Bytes
  • Size of remote file: 250 kB
data/input_image_bases/male/01-Male-Indian_608.jpg ADDED

Git LFS Details

  • SHA256: 816d36bbb50acbacebf74d0e0c9f1a9fe5b39c37d6f40c612a7b67dc02ffe772
  • Pointer size: 131 Bytes
  • Size of remote file: 214 kB
data/input_image_bases/male/02-Male-Korean_608.jpg ADDED

Git LFS Details

  • SHA256: 0178b68fb104f30efb198ee96ac4ba41dbedf516fed306c73fd9548d68adb4fd
  • Pointer size: 131 Bytes
  • Size of remote file: 225 kB
data/input_image_bases/male/03-Male-European_608.jpg ADDED

Git LFS Details

  • SHA256: eae049f326bdaa5d966285d2cbd8429dcb7c48ab91a55126b38781be65673b98
  • Pointer size: 131 Bytes
  • Size of remote file: 272 kB
data/input_image_bases/male/04-Male-American_608.jpg ADDED

Git LFS Details

  • SHA256: 5a2d59c6418c2bb8490265d0fd261f2c1ec0e50e09fbf61abaef2e57ef870b8d
  • Pointer size: 131 Bytes
  • Size of remote file: 242 kB
data/input_image_bases/male/05-Male-AfricanAmerican_608.jpg ADDED

Git LFS Details

  • SHA256: 9b84bc585d8ea0d6303bac4b82ef587df6c1bf03c2e445474554c4c4abbc4bc4
  • Pointer size: 131 Bytes
  • Size of remote file: 205 kB
data/input_video_bases/female/01-Female-Korean_608.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ecf7828e7d0f421767d190b3555868728b184edac1f4a0201820f1c58865d7c
3
+ size 2000776
data/input_video_bases/female/02-Female-Latina_608.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6884cd58b987f02443d83b3faae37951aa33a689245c3bf65725f609c6303789
3
+ size 2666194
data/input_video_bases/female/03-Female-European_608.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbde154264db6fbcb94e3c93c529b365f67e667473cc8a1445e0e9223ce6ea8b
3
+ size 1625368
data/input_video_bases/female/04-Female-Indian_608.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a3a358644c023f7cde032e5570d9b39b615b594d8ab6747456a2c60ac9a1f1c
3
+ size 1529791
data/input_video_bases/female/05-Female-American_608.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35a91366a511a6b27f15edca2b5b6428e1ea3781971c9ac4202a34c49c0cef89
3
+ size 1903512
data/input_video_bases/male/01-Male-Japanese_608.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9325107bacf0442932b74f88fc861a008fbbf4770f32074a0f818cc7f69c1759
3
+ size 1770959
data/input_video_bases/male/02-Male-European_608.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0eb1e61a0b6f22a4fcfd3acb90c5e661396678fcde7eca3edd394f1223483ea
3
+ size 1693659
data/input_video_bases/male/03-Male-American02_608.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68c9427293f6b721ac180f596b71ea4df1e5a5f5d3938f7ac9ac16df2007562f
3
+ size 1927639
data/input_video_bases/male/04-Male-Indian_608.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e12f8f6c70d602ad8c8f422ffd703a6c012b453d9902245b82b4ae0c051397d6
3
+ size 1352685
data/input_video_bases/male/05-Male-American_608.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:448f993473f7a8291f8591856e15701c7e9bb373ddbf9e9c8a773d69b84601ac
3
+ size 1854230
data/showcase_examples/archive/01 Multilingual Female_720.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c67441bab5596482bfcb40c725c0829fb7b4df1a5642e43661b6553b20cefed2
3
+ size 17771532
data/showcase_examples/archive/02 Multilingual Male_720.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82605475898eddb08165ec3429bb933e94a765d23c8c7a4ef1ecfa70363a4638
3
+ size 13215459
data/showcase_examples/archive/02 Multilingual Male_720_IM.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63481b053c30f05600791361914e9d2f7a17d003da56d1776f319622d8ec0a3
3
+ size 17479793
data/showcase_examples/archive/03 Corporate Message_720.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:917db888f55ada94ee47b2f05a0ed2274f71d750b25f3c11ae5e9bc4b86a663c
3
+ size 2930433
data/showcase_examples/archive/04 Multi-Identities: Multilingual_720.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:414fd98b0816cbd2834353b87dcb3e3f41e3c47423c0b50040a79461c225f500
3
+ size 5313472
data/showcase_examples/archive/05 Multi-Identities: Rap_720.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efcc23f689bc5067a30ab46efaa6d546c46cf422427dbb058fde6b8be066fbd3
3
+ size 2556681