Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -329,7 +329,33 @@ def synthesize_video_with_audio(video_file, caption):
|
|
329 |
|
330 |
# Gradio界面
|
331 |
with gr.Blocks() as demo:
|
332 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
with gr.Row():
|
334 |
video_input = gr.Video(label="upload video")
|
335 |
caption_input = gr.Textbox(label="caption(optional)", placeholder="can be empty", lines=1)
|
|
|
329 |
|
330 |
# Gradio界面
|
331 |
with gr.Blocks() as demo:
|
332 |
+
gr.Markdown(
|
333 |
+
"""
|
334 |
+
# ThinkSound\n
|
335 |
+
ThinkSound is a unified Any2Audio generation framework with flow matching guided by Chain-of-Thought (CoT) reasoning.
|
336 |
+
|
337 |
+
Upload video and caption (optional), and get video with audio!
|
338 |
+
|
339 |
+
[Project page is here](https://huggingface.co/spaces/FunAudioLLM/ThinkSound)
|
340 |
+
[Model weights is here](https://huggingface.co/liuhuadai/ThinkSound)
|
341 |
+
|
342 |
+
## Citation
|
343 |
+
|
344 |
+
If you find our work useful, please cite our paper:
|
345 |
+
|
346 |
+
```bibtex
|
347 |
+
@misc{liu2025thinksoundchainofthoughtreasoningmultimodal,
|
348 |
+
title={ThinkSound: Chain-of-Thought Reasoning in Multimodal Large Language Models for Audio Generation and Editing},
|
349 |
+
author={Huadai Liu and Jialei Wang and Kaicheng Luo and Wen Wang and Qian Chen and Zhou Zhao and Wei Xue},
|
350 |
+
year={2025},
|
351 |
+
eprint={2506.21448},
|
352 |
+
archivePrefix={arXiv},
|
353 |
+
primaryClass={eess.AS},
|
354 |
+
url={https://arxiv.org/abs/2506.21448},
|
355 |
+
}
|
356 |
+
```
|
357 |
+
"""
|
358 |
+
)
|
359 |
with gr.Row():
|
360 |
video_input = gr.Video(label="upload video")
|
361 |
caption_input = gr.Textbox(label="caption(optional)", placeholder="can be empty", lines=1)
|