from huggingface_hub import snapshot_download
from modelscope.pipelines import pipeline
from modelscope.outputs import OutputKeys
import pathlib
import gradio as gr

# Download the model weights and prepare the model directory
model_dir = pathlib.Path('weights')
snapshot_download('damo-vilab/modelscope-damo-text-to-video-synthesis',
                   repo_type='model', local_dir=model_dir)

# Initialize the text-to-video synthesis pipeline
pipe = pipeline('text-to-video-synthesis', model_dir.as_posix())

# Define a function that takes a text prompt and generates a video
def generate_video(text_prompt):
    test_text = {'text': text_prompt}
    output_video_path = pipe(test_text)[OutputKeys.OUTPUT_VIDEO]
    return output_video_path

# Set up Gradio interface
demo = gr.Interface(
    fn=generate_video,
    inputs=gr.Textbox(label="Enter a text prompt", placeholder="Describe the scene..."),
    outputs=gr.Video(label="Generated Video"),
    title="Text-to-Video Generator",
    description="Enter a text description, and the model will generate a video based on your input.",
)

# Launch the Gradio interface
if __name__ == "__main__":
    demo.launch()