Spaces:
Sleeping
Sleeping
Miquel Farre
commited on
Commit
·
e50a69b
1
Parent(s):
93c34ad
- app.py +7 -4
- video_highlight_detector.py +0 -54
app.py
CHANGED
|
@@ -78,7 +78,8 @@ def process_video(
|
|
| 78 |
add_watermark(temp_output, output_path)
|
| 79 |
|
| 80 |
os.unlink(temp_output)
|
| 81 |
-
|
|
|
|
| 82 |
video_description = video_description[:500] + "..." if len(video_description) > 500 else video_description
|
| 83 |
highlight_types = highlight_types[:500] + "..." if len(highlight_types) > 500 else highlight_types
|
| 84 |
|
|
@@ -125,7 +126,9 @@ def create_ui(examples_path: str):
|
|
| 125 |
label="Upload your video (max 20 minutes)",
|
| 126 |
interactive=True
|
| 127 |
)
|
| 128 |
-
|
|
|
|
|
|
|
| 129 |
|
| 130 |
status = gr.Markdown(visible=True)
|
| 131 |
|
|
@@ -139,7 +142,7 @@ def create_ui(examples_path: str):
|
|
| 139 |
output_video = gr.Video(label="Highlight Video", visible=False)
|
| 140 |
download_btn = gr.Button("Download Highlights", visible=False)
|
| 141 |
|
| 142 |
-
def on_process(video):
|
| 143 |
if not video:
|
| 144 |
return {
|
| 145 |
status: "Please upload a video",
|
|
@@ -150,7 +153,7 @@ def create_ui(examples_path: str):
|
|
| 150 |
}
|
| 151 |
|
| 152 |
status.value = "Processing video..."
|
| 153 |
-
output_path, desc, highlights, err = process_video(video)
|
| 154 |
|
| 155 |
if err:
|
| 156 |
return {
|
|
|
|
| 78 |
add_watermark(temp_output, output_path)
|
| 79 |
|
| 80 |
os.unlink(temp_output)
|
| 81 |
+
progress(1.0, desc="Complete!")
|
| 82 |
+
|
| 83 |
video_description = video_description[:500] + "..." if len(video_description) > 500 else video_description
|
| 84 |
highlight_types = highlight_types[:500] + "..." if len(highlight_types) > 500 else highlight_types
|
| 85 |
|
|
|
|
| 126 |
label="Upload your video (max 20 minutes)",
|
| 127 |
interactive=True
|
| 128 |
)
|
| 129 |
+
|
| 130 |
+
gr.Progress()
|
| 131 |
+
process_btn = gr.Button("Process Video", variant="primary")
|
| 132 |
|
| 133 |
status = gr.Markdown(visible=True)
|
| 134 |
|
|
|
|
| 142 |
output_video = gr.Video(label="Highlight Video", visible=False)
|
| 143 |
download_btn = gr.Button("Download Highlights", visible=False)
|
| 144 |
|
| 145 |
+
def on_process(video, progress=gr.Progress()):
|
| 146 |
if not video:
|
| 147 |
return {
|
| 148 |
status: "Please upload a video",
|
|
|
|
| 153 |
}
|
| 154 |
|
| 155 |
status.value = "Processing video..."
|
| 156 |
+
output_path, desc, highlights, err = process_video(video, progress=progress)
|
| 157 |
|
| 158 |
if err:
|
| 159 |
return {
|
video_highlight_detector.py
CHANGED
|
@@ -768,57 +768,3 @@ def load_model(
|
|
| 768 |
|
| 769 |
return model, processor
|
| 770 |
|
| 771 |
-
|
| 772 |
-
# def load_model(
|
| 773 |
-
# checkpoint_path: Optional[str] = None,
|
| 774 |
-
# base_model_id: str = "HuggingFaceTB/SmolVLM-2.2B-Instruct",
|
| 775 |
-
# device: str = "cuda"
|
| 776 |
-
# ):
|
| 777 |
-
# """Load the model and processor."""
|
| 778 |
-
# # For demonstration, we set the target size
|
| 779 |
-
# video_target_size = 384
|
| 780 |
-
|
| 781 |
-
# processor = AutoProcessor.from_pretrained(base_model_id)
|
| 782 |
-
# # Configure the image processor
|
| 783 |
-
# processor.image_processor.size = {"longest_edge": video_target_size}
|
| 784 |
-
# processor.image_processor.do_resize = True
|
| 785 |
-
# processor.image_processor.do_image_splitting = False
|
| 786 |
-
|
| 787 |
-
# if checkpoint_path:
|
| 788 |
-
# model = SmolVLMForConditionalGeneration.from_pretrained(
|
| 789 |
-
# checkpoint_path,
|
| 790 |
-
# torch_dtype=torch.bfloat16,
|
| 791 |
-
# device_map=device
|
| 792 |
-
# )
|
| 793 |
-
# else:
|
| 794 |
-
# model = SmolVLMForConditionalGeneration.from_pretrained(
|
| 795 |
-
# base_model_id,
|
| 796 |
-
# torch_dtype=torch.bfloat16,
|
| 797 |
-
# device_map=device
|
| 798 |
-
# )
|
| 799 |
-
|
| 800 |
-
# return model, processor
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
def main():
|
| 804 |
-
checkpoint_path = "/fsx/miquel/smolvlmvideo/checkpoints/final-visionUnfrozen-balanced/checkpoint-6550"
|
| 805 |
-
base_model_id = "HuggingFaceTB/SmolVLM-2.2B-Instruct"
|
| 806 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 807 |
-
|
| 808 |
-
model, processor = load_model(checkpoint_path, base_model_id, device)
|
| 809 |
-
detector = BatchedVideoHighlightDetector(model, processor, device=device)
|
| 810 |
-
|
| 811 |
-
if len(sys.argv) < 3:
|
| 812 |
-
print("Usage: python video_highlight_detector.py <input_video> <output_video>")
|
| 813 |
-
sys.exit(1)
|
| 814 |
-
|
| 815 |
-
video_path = sys.argv[1]
|
| 816 |
-
output_path = sys.argv[2]
|
| 817 |
-
|
| 818 |
-
# Create highlight video
|
| 819 |
-
highlight_segments = detector.create_highlight_video(video_path, output_path)
|
| 820 |
-
print(f"Created highlight video with {len(highlight_segments)} segments")
|
| 821 |
-
|
| 822 |
-
|
| 823 |
-
if __name__ == "__main__":
|
| 824 |
-
main()
|
|
|
|
| 768 |
|
| 769 |
return model, processor
|
| 770 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|