Simultaneous-Segmented-Depth-Prediction

Sleeping

App Files Files Community

vaishanthr commited on Jul 12, 2023

Commit

00ab2e7

1 Parent(s): f5bc491

updated code files

Browse files

Files changed (3) hide show

app.py +29 -21
point_cloud_generator.py +77 -33
utils.py +3 -2

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import numpy as np
 import os
 import torch
 import utils
 from image_segmenter import ImageSegmenter
 from monocular_depth_estimator import MonocularDepthEstimator
@@ -21,7 +22,9 @@ def process_image(image):
     image_segmentation, objects_data = img_seg.predict(image)
     depthmap, depth_colormap = depth_estimator.make_prediction(image)
     dist_image = utils.draw_depth_info(image, depthmap, objects_data)
-    return image_segmentation, depth_colormap, dist_image
 def test_process_img(image):
     image = utils.resize(image)
@@ -32,14 +35,14 @@ def test_process_img(image):
 def process_video(vid_path=None):
     vid_cap = cv2.VideoCapture(vid_path)
     while vid_cap.isOpened():
-        ret, frame = vid_cap.read()
         if ret:
             print("making predictions ....")
             frame = utils.resize(frame)
             image_segmentation, objects_data = img_seg.predict(frame)
             depthmap, depth_colormap = depth_estimator.make_prediction(frame)
             dist_image = utils.draw_depth_info(frame, depthmap, objects_data)
-            yield cv2.cvtColor(image_segmentation, cv2.COLOR_BGR2RGB), depth_colormap, dist_image
     return None
@@ -76,9 +79,9 @@ if __name__ == "__main__":
     # image_segmentation, objects_data, depthmap, depth_colormap = test_process_img(img_1)
     # final_image = utils.draw_depth_info(image_segmentation, depthmap, objects_data)
-    # objs_pcd = utils.generate_obj_pcd(depthmap, objects_data[2][3])
     # # print(objs_pcd[0][0])
-    # # display_pcd(objs_pcd, use_matplotlib=False)
     # cv2.imshow("Segmentation", image_segmentation)
     # cv2.imshow("Depth", depthmap*objects_data[2][3])
@@ -91,11 +94,9 @@ if __name__ == "__main__":
     with gr.Blocks() as my_app:
         # title
-        gr.Markdown(
-        """
-        # Object segmentation and depth estimation
-        Input an image or Video
-        """)
         # tabs
         with gr.Tab("Image"):
@@ -119,13 +120,17 @@ if __name__ == "__main__":
                     with gr.Row():
                         dist_img_output = gr.Image(height=300, label="Distance")
-                        pcd_img_output = gr.Image(height=300, label="Point Cloud")
             gr.Markdown("## Sample Images")
             gr.Examples(
-                examples=[os.path.join(os.path.dirname(__file__), "assets/images/bus.jpg")],
                 inputs=img_input,
-                outputs=[segmentation_img_output, depth_img_output],
                 fn=process_image,
                 cache_examples=True,
             )
@@ -139,7 +144,7 @@ if __name__ == "__main__":
                          "Medium - Balanced performance and accuracy",
                          "Large - Slow performance and high accuracy"],
                         label="Model Type", value="Small - Better performance and less accuracy",
-                        info="Select the inference model before running predictions!"),
                     options_checkbox_vid = gr.CheckboxGroup(["Show Boundary Box", "Show Segmentation Region", "Show Segmentation Boundary"], label="Options")
                     conf_thres_vid = gr.Slider(1, 100, value=60, label="Confidence Threshold", info="Choose the threshold above which objects should be detected")
@@ -149,33 +154,36 @@ if __name__ == "__main__":
                 with gr.Column(scale=2):
                     with gr.Row():
-                        segmentation_vid_output = gr.Image(height=400, label="Segmentation")
-                        depth_vid_output = gr.Image(height=400, label="Depth Estimation")
                     with gr.Row():
                         dist_vid_output = gr.Image(height=300, label="Distance")
-                        pcd_vid_output = gr.Image(height=300, label="Point Cloud")
             gr.Markdown("## Sample Videos")
             gr.Examples(
-                examples=[os.path.join(os.path.dirname(__file__), "assets/videos/input_video.mp4")],
                 inputs=vid_input,
                 # outputs=vid_output,
                 # fn=vid_segmenation,
             )
         # image tab logic
-        submit_btn_img.click(process_image, inputs=img_input, outputs=[segmentation_img_output, depth_img_output, dist_img_output])
         options_checkbox_img.change(update_segmentation_options, options_checkbox_img, [])
         conf_thres_img.change(update_confidence_threshold, conf_thres_img, [])
         model_type_img.change(model_selector, model_type_img, [])
         # video tab logic
         submit_btn_vid.click(process_video, inputs=vid_input, outputs=[segmentation_vid_output, depth_vid_output, dist_vid_output])
         cancel_btn.click(cancel, inputs=[], outputs=[])
         options_checkbox_vid.change(update_segmentation_options, options_checkbox_vid, [])
-        conf_thres_vid.change(update_confidence_threshold, conf_thres_vid, [])
     my_app.queue(concurrency_count=5, max_size=20).launch()

 import os
 import torch
 import utils
+import plotly.graph_objects as go
 from image_segmenter import ImageSegmenter
 from monocular_depth_estimator import MonocularDepthEstimator
     image_segmentation, objects_data = img_seg.predict(image)
     depthmap, depth_colormap = depth_estimator.make_prediction(image)
     dist_image = utils.draw_depth_info(image, depthmap, objects_data)
+    objs_pcd = utils.generate_obj_pcd(depthmap, objects_data)
+    plot_fig = display_pcd(objs_pcd)
+    return image_segmentation, depth_colormap, dist_image, plot_fig
 def test_process_img(image):
     image = utils.resize(image)
 def process_video(vid_path=None):
     vid_cap = cv2.VideoCapture(vid_path)
     while vid_cap.isOpened():
+        ret, frame = vid_cap.read()
         if ret:
             print("making predictions ....")
             frame = utils.resize(frame)
             image_segmentation, objects_data = img_seg.predict(frame)
             depthmap, depth_colormap = depth_estimator.make_prediction(frame)
             dist_image = utils.draw_depth_info(frame, depthmap, objects_data)
+            yield cv2.cvtColor(image_segmentation, cv2.COLOR_BGR2RGB), depth_colormap, cv2.cvtColor(dist_image, cv2.COLOR_BGR2RGB)
     return None
     # image_segmentation, objects_data, depthmap, depth_colormap = test_process_img(img_1)
     # final_image = utils.draw_depth_info(image_segmentation, depthmap, objects_data)
+    # objs_pcd = utils.generate_obj_pcd(depthmap, objects_data)
     # # print(objs_pcd[0][0])
+    # display_pcd(objs_pcd, use_matplotlib=True)
     # cv2.imshow("Segmentation", image_segmentation)
     # cv2.imshow("Depth", depthmap*objects_data[2][3])
     with gr.Blocks() as my_app:
         # title
+        gr.Markdown("<h1><center>Simultaneous Segmentation and Depth Estimation</center></h1>")
+        gr.Markdown("<h3><center>Created by Vaishanth</center></h3>")
+        gr.Markdown("<h3><center>This model estimates the depth of segmented objects.</center></h3>")
         # tabs
         with gr.Tab("Image"):
                     with gr.Row():
                         dist_img_output = gr.Image(height=300, label="Distance")
+                        pcd_img_output = gr.Plot(label="Point Cloud")
             gr.Markdown("## Sample Images")
             gr.Examples(
+                examples=[os.path.join(os.path.dirname(__file__), "assets/images/baggage_claim.jpg"),
+                          os.path.join(os.path.dirname(__file__), "assets/images/kitchen_2.png"),
+                          os.path.join(os.path.dirname(__file__), "assets/images/soccer.jpg"),
+                          os.path.join(os.path.dirname(__file__), "assets/images/room_2.png"),
+                          os.path.join(os.path.dirname(__file__), "assets/images/living_room.jpg")],
                 inputs=img_input,
+                outputs=[segmentation_img_output, depth_img_output, dist_img_output, pcd_img_output],
                 fn=process_image,
                 cache_examples=True,
             )
                          "Medium - Balanced performance and accuracy",
                          "Large - Slow performance and high accuracy"],
                         label="Model Type", value="Small - Better performance and less accuracy",
+                        info="Select the inference model before running predictions!")
                     options_checkbox_vid = gr.CheckboxGroup(["Show Boundary Box", "Show Segmentation Region", "Show Segmentation Boundary"], label="Options")
                     conf_thres_vid = gr.Slider(1, 100, value=60, label="Confidence Threshold", info="Choose the threshold above which objects should be detected")
                 with gr.Column(scale=2):
                     with gr.Row():
+                        segmentation_vid_output = gr.Image(height=300, label="Segmentation")
+                        depth_vid_output = gr.Image(height=300, label="Depth Estimation")
                     with gr.Row():
                         dist_vid_output = gr.Image(height=300, label="Distance")
             gr.Markdown("## Sample Videos")
             gr.Examples(
+                examples=[os.path.join(os.path.dirname(__file__), "assets/videos/input_video.mp4"),
+                          os.path.join(os.path.dirname(__file__), "assets/videos/driving.mp4"),
+                          os.path.join(os.path.dirname(__file__), "assets/videos/overpass.mp4"),
+                          os.path.join(os.path.dirname(__file__), "assets/videos/walking.mp4")],
                 inputs=vid_input,
                 # outputs=vid_output,
                 # fn=vid_segmenation,
             )
         # image tab logic
+        submit_btn_img.click(process_image, inputs=img_input, outputs=[segmentation_img_output, depth_img_output, dist_img_output, pcd_img_output])
         options_checkbox_img.change(update_segmentation_options, options_checkbox_img, [])
         conf_thres_img.change(update_confidence_threshold, conf_thres_img, [])
         model_type_img.change(model_selector, model_type_img, [])
         # video tab logic
         submit_btn_vid.click(process_video, inputs=vid_input, outputs=[segmentation_vid_output, depth_vid_output, dist_vid_output])
+        model_type_vid.change(model_selector, model_type_vid, [])
         cancel_btn.click(cancel, inputs=[], outputs=[])
         options_checkbox_vid.change(update_segmentation_options, options_checkbox_vid, [])
+        conf_thres_vid.change(update_confidence_threshold, conf_thres_vid, [])
     my_app.queue(concurrency_count=5, max_size=20).launch()

point_cloud_generator.py CHANGED Viewed

@@ -2,7 +2,7 @@ import cv2
 import numpy as np
 import matplotlib.pyplot as plt
 import open3d as o3d
@@ -70,6 +70,7 @@ class PointCloudGenerator:
     def generate_point_cloud(self, depth_img, normalize=False):
         if normalize:
             # normalizing depth image
@@ -81,49 +82,92 @@ class PointCloudGenerator:
         # convert depth to point cloud
         # point_cloud = self.conver_to_point_cloud(depth_img)
-        depth_image = o3d.geometry.Image(depth_img)
-        # Create open3d camera intrinsic object
-        intrinsic_matrix = np.array([[self.fx_depth, 0, self.cx_depth], [0, self.fy_depth, self.cy_depth], [0, 0, 1]])
-        camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
-        # camera_intrinsic.intrinsic_matrix = intrinsic_matrix
-        camera_intrinsic.set_intrinsics(depth_image.width, depth_image.height, self.fx_depth, self.fy_depth, self.cx_depth, self.cy_depth)
         # Create open3d point cloud from depth image
-        point_cloud = o3d.geometry.PointCloud.create_from_depth_image(depth_img, camera_intrinsic)
         return point_cloud
-def display_pcd(pcd_data, use_matplotlib=True):
-    if use_matplotlib:
-        fig = plt.figure()
-        ax = fig.add_subplot(111, projection='3d')
-    for data, clr in pcd_data:
-        points = np.array(data)
-        skip = 5
-        point_range = range(0, points.shape[0], skip) # skip points to prevent crash
-        if use_matplotlib:
-            ax.scatter(points[point_range, 0], points[point_range, 1], points[point_range, 2], c='r', marker='o')
-        if not use_matplotlib:
-            pcd_o3d = o3d.geometry.PointCloud()  # create point cloud object
-            pcd_o3d.points = o3d.utility.Vector3dVector(points)  # set pcd_np as the point cloud points
-            # Visualize:
-            o3d.visualization.draw_geometries([pcd_o3d])
-    if use_matplotlib:
-        ax.set_xlabel('X Label')
-        ax.set_ylabel('Y Label')
-        ax.set_zlabel('Z Label')
-        ax.view_init(elev=90, azim=0, roll=0)
-        plt.show()
-    if not use_matplotlib:
-        o3d.visualization.draw_geometries([pcd_o3d])
 if __name__ == "__main__":
     depth_img_path = "assets/images/depth_map_p1.png"

 import numpy as np
 import matplotlib.pyplot as plt
 import open3d as o3d
+import plotly.graph_objects as go
     def generate_point_cloud(self, depth_img, normalize=False):
+        depth_img = np.array(depth_img)
         if normalize:
             # normalizing depth image
         # convert depth to point cloud
         # point_cloud = self.conver_to_point_cloud(depth_img)
+        # depth_image = o3d.geometry.Image(depth_img)
+        depth_image = o3d.geometry.Image(np.ascontiguousarray(depth_img))
+        # # Create open3d camera intrinsic object
+        # intrinsic_matrix = np.array([[self.fx_depth, 0, self.cx_depth], [0, self.fy_depth, self.cy_depth], [0, 0, 1]])
+        # camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
+        # # camera_intrinsic.intrinsic_matrix = intrinsic_matrix
+        # camera_intrinsic.set_intrinsics(640, 480, self.fx_depth, self.fy_depth, self.cx_depth, self.cy_depth)
+        # camera settings
+        # camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
+        #     depth_img.shape[0], depth_img.shape[1], 500, 500, depth_img.shape[0] / 2, depth_img.shape[1] / 2
+        # )
         # Create open3d point cloud from depth image
+        point_cloud = o3d.geometry.PointCloud.create_from_depth_image(depth_image,
+        o3d.camera.PinholeCameraIntrinsic( o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
         return point_cloud
+# def display_pcd(pcd_data, use_matplotlib=True):
+#     if use_matplotlib:
+#         fig = plt.figure()
+#         ax = fig.add_subplot(111, projection='3d')
+#     for data, clr in pcd_data:
+#         # points = np.array(data)
+#         points = np.asarray(data.points)
+#         skip = 5
+#         point_range = range(0, points.shape[0], skip) # skip points to prevent crash
+#         if use_matplotlib:
+#             ax.scatter(points[point_range, 0], points[point_range, 1], points[point_range, 2]*100, c=list(clr).append(1), marker='o')
+#         # if not use_matplotlib:
+#         #     pcd_o3d = o3d.geometry.PointCloud()  # create point cloud object
+#         #     pcd_o3d.points = o3d.utility.Vector3dVector(points)  # set pcd_np as the point cloud points
+#         #     # Visualize:
+#         #     o3d.visualization.draw_geometries([pcd_o3d])
+#     if use_matplotlib:
+#         ax.set_xlabel('X Label')
+#         ax.set_ylabel('Y Label')
+#         ax.set_zlabel('Z Label')
+#         ax.view_init(elev=-90, azim=0, roll=-90)
+#         # plt.show()
+#         return fig
+#     if not use_matplotlib:
+#         o3d.visualization.draw_geometries([pcd_o3d])
+def display_pcd(pcd_data):
+    fig = go.Figure()
+    for data, clr in pcd_data:
+        points = np.asarray(data.points)
+        skip = 1
+        point_range = range(0, points.shape[0], skip)
+        fig.add_trace(go.Scatter3d(
+            x=points[point_range, 0],
+            y=points[point_range, 1],
+            z=points[point_range, 2]*100,
+            mode='markers',
+            marker=dict(
+                size=1,
+                color='rgb'+str(clr),
+                opacity=1
+            )
+        ))
+    fig.update_layout(
+        scene=dict(
+            xaxis_title='X Label',
+            yaxis_title='Y Label',
+            zaxis_title='Z Label',
+            camera=dict(
+                eye=dict(x=0, y=0, z=-1),
+                # up=dict(x=0, y=0, z=1),
+            )
+        )
+    )
+    return fig
 if __name__ == "__main__":
     depth_img_path = "assets/images/depth_map_p1.png"

utils.py CHANGED Viewed

@@ -27,7 +27,8 @@ def draw_depth_info(image, depth_map, objects_data):
         center = data[2]
         mask = data[3]
         _, depth = get_masked_depth(depth_map, mask)
-        cv2.putText(image, str(round(depth*10, 2))+'m', center, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
     return image
@@ -35,7 +36,7 @@ def generate_obj_pcd(depth_map, objects_data):
     objs_pcd = []
     pcd_generator = PointCloudGenerator()
-    for data in objects_data[:2]:
         mask = data[3]
         cls_clr = data[4]
         masked_depth = depth_map*mask

         center = data[2]
         mask = data[3]
         _, depth = get_masked_depth(depth_map, mask)
+        cv2.rectangle(image, (center[0]-15, center[1]-15), (center[0]+(len(str(round(depth*10, 2))+'m')*12), center[1]+15), data[4], -1)
+        cv2.putText(image, str(round(depth*10, 2))+'m', (center[0]-5, center[1]+5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
     return image
     objs_pcd = []
     pcd_generator = PointCloudGenerator()
+    for data in objects_data:
         mask = data[3]
         cls_clr = data[4]
         masked_depth = depth_map*mask