Spaces:

DHEIVER
/

gradio-sam-video-segmentation

Sleeping

App Files Files Community

DHEIVER commited on Dec 3, 2024

Commit

ded1ee1

verified ·

1 Parent(s): ceeee63

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -18

app.py CHANGED Viewed

@@ -20,20 +20,16 @@ def download_sam_model():
         print("Download complete!")
     return checkpoint_path
-def process_video_sam(video_path):
-    # Download model if needed
     checkpoint_path = download_sam_model()
-    # Initialize SAM
     DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    MODEL_TYPE = "vit_h"
-    sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint_path)
     sam.to(device=DEVICE)
     predictor = SamPredictor(sam)
-    # Process video
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
@@ -44,32 +40,55 @@ def process_video_sam(video_path):
                          fps,
                          (width, height))
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
         predictor.set_image(frame)
-        masks = predictor.generate()
         annotated_frame = frame.copy()
-        for mask in masks[0]:
-            annotated_frame[mask.mask] = annotated_frame[mask.mask] * 0.5 + np.array([0, 255, 0]) * 0.5
         out.write(annotated_frame)
     cap.release()
     out.release()
     return output_path
-iface = gr.Interface(
-    fn=process_video_sam,
-    inputs=gr.Video(label="Upload Video"),
-    outputs=gr.Video(label="Segmented Video"),
-    title="Video Segmentation with SAM",
-    description="Upload a video to segment objects using Segment Anything Model"
-)
 if __name__ == "__main__":
-    iface.launch()

         print("Download complete!")
     return checkpoint_path
+def process_video_sam(video_path, progress=gr.Progress()):
     checkpoint_path = download_sam_model()
     DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    sam = sam_model_registry["vit_h"](checkpoint=checkpoint_path)
     sam.to(device=DEVICE)
     predictor = SamPredictor(sam)
     cap = cv2.VideoCapture(video_path)
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                          fps,
                          (width, height))
+    frame_count = 0
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
         predictor.set_image(frame)
+        # Gerar pontos de prompt automáticos
+        input_point = np.array([[width//2, height//2]])
+        input_label = np.array([1])
+        masks, scores, logits = predictor.predict(
+            point_coords=input_point,
+            point_labels=input_label,
+            multimask_output=True
+        )
         annotated_frame = frame.copy()
+        for mask in masks:
+            annotated_frame[mask] = annotated_frame[mask] * 0.5 + np.array([0, 255, 0]) * 0.5
         out.write(annotated_frame)
+        frame_count += 1
+        progress(frame_count/total_frames, desc="Processing video...")
     cap.release()
     out.release()
     return output_path
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# Video Segmentation with SAM")
+    gr.Markdown("Upload a video to segment objects using Segment Anything Model")
+    with gr.Row():
+        with gr.Column():
+            input_video = gr.Video(label="Input Video")
+            process_btn = gr.Button("Process Video", variant="primary")
+        with gr.Column():
+            output_video = gr.Video(label="Segmented Video")
+    process_btn.click(
+        fn=process_video_sam,
+        inputs=input_video,
+        outputs=output_video,
+        api_name="segment_video"
+    )
 if __name__ == "__main__":
+    demo.launch()