Spaces:
Running
on
Zero
Running
on
Zero
import spaces | |
import gradio as gr | |
from struct_caption import StructCaptioner | |
from fusion_caption import FusionCaptioner | |
struct_captioner = StructCaptioner("Skywork/SkyCaptioner-V1") | |
fusion_captioner = FusionCaptioner("Qwen/Qwen3-8B") | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
<h1 style="text-align: center; font-size: 2em;">SkyCaptioner-V1</h1> | |
""", | |
elem_id="header" | |
) | |
with gr.Row(): | |
with gr.Column(scale=0.5): | |
video_input = gr.Video( | |
label="Upload Video", | |
interactive=True, | |
format="mp4", | |
) | |
btn_struct = gr.Button("Generate Struct Caption") | |
with gr.Column(): | |
struct_caption_output = gr.Code( | |
label="Struct Caption", | |
language="json", | |
lines=25, | |
interactive=False | |
) | |
with gr.Row(): | |
with gr.Column(scale=0.5): | |
with gr.Row(): | |
task_input = gr.Radio( | |
label="Task Type", | |
choices=["t2v", "i2v"], | |
value="t2v", | |
interactive=True | |
) | |
btn_fusion = gr.Button("Generate Fusion Caption") | |
with gr.Column(): | |
fusion_caption_output = gr.Textbox( | |
label="Fusion Caption", | |
value="", | |
interactive=False | |
) | |
def generate_struct_caption(video): | |
struct_caption = struct_captioner(video) | |
return struct_caption | |
def generate_fusion_caption(struct_caption_str, task): | |
return fusion_captioner(struct_caption_str, task) | |
btn_struct.click( | |
fn=generate_struct_caption, | |
inputs=video_input, | |
outputs=struct_caption_output | |
) | |
btn_fusion.click( | |
fn=generate_fusion_caption, | |
inputs=[struct_caption_output, task_input], | |
outputs=fusion_caption_output | |
) | |
gr.Examples( | |
examples=[ | |
["./examples/1.mp4"], | |
["./examples/2.mp4"], | |
["./examples/3.mp4"], | |
["./examples/4.mp4"], | |
], | |
inputs=video_input, | |
label="Example Videos" | |
) | |
demo.launch() |