Spaces:

Realcat
/

omni-sfm

Running

App Files Files Community

vincentqyw commited on Jun 2

Commit

6d69218

1 Parent(s): d47224a

init

Browse files

Files changed (7) hide show

README.md +3 -2
app.py +8 -0
requirements.txt +7 -0
src/__init__.py +5 -0
src/comfy_ui.py +397 -0
src/gradio_ui.py +188 -0
src/omni_processor.py +225 -0

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Omni Sfm
 emoji: 💻
 colorFrom: purple
 colorTo: purple
@@ -9,4 +9,5 @@ app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Omni SFM
 emoji: 💻
 colorFrom: purple
 colorTo: purple
 pinned: false
 ---
+Check out the configuration reference at https://github.com/Vincentqyw/omni-sfm

app.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from src.gradio_ui import OmniConverterUI
+if __name__ == "__main__":
+    ui = OmniConverterUI()
+    app = ui.create_interface()
+    app.queue().launch(
+        server_name="0.0.0.0", server_port=7860, share=False
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+loguru
+numpy
+opencv-python
+Pillow
+py360convert
+scipy

src/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .comfy_ui import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
+from .gradio_ui import OmniConverterUI
+from .omni_processor import OmniVideoProcessor
+__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]

src/comfy_ui.py ADDED Viewed

	@@ -0,0 +1,397 @@

+import os
+import sys
+from pathlib import Path
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+try:
+    from .omni_processor import OmniVideoProcessor
+except ImportError:
+    print(
+        "Warning: omni_processor not found, some functionality may be limited",
+        file=sys.stderr,
+    )
+try:
+    from .read_write_model import read_model
+except ImportError:
+    print(
+        "Warning: read_write_model not found, some functionality may be limited",
+        file=sys.stderr,
+    )
+class OmniParameterControls:
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "frame_interval": (
+                    "INT",
+                    {"default": 24, "min": 1, "max": 100},
+                ),
+                "width": ("INT", {"default": 640, "min": 100, "max": 2000}),
+                "height": ("INT", {"default": 640, "min": 100, "max": 2000}),
+                "cx": ("FLOAT", {"default": 320.0, "min": 0.0, "max": 2000.0}),
+                "cy": ("FLOAT", {"default": 320.0, "min": 0.0, "max": 2000.0}),
+                "fov_h": (
+                    "FLOAT",
+                    {"default": 90.0, "min": 30.0, "max": 180.0},
+                ),
+                "fov_v": (
+                    "FLOAT",
+                    {"default": 90.0, "min": 30.0, "max": 180.0},
+                ),
+                "base_pitch": (
+                    "FLOAT",
+                    {"default": 35.0, "min": -90.0, "max": 90.0},
+                ),
+                "yaw_steps": ("INT", {"default": 4, "min": 1, "max": 12}),
+                "yaw_offset": (
+                    "FLOAT",
+                    {"default": 0.0, "min": -180.0, "max": 180.0},
+                ),
+            },
+            "optional": {
+                "pano_projection": (
+                    ["equirectangular", "cubemap"],
+                    {"default": "equirectangular"},
+                ),
+                "pano_quality": (
+                    ["low", "medium", "high"],
+                    {"default": "medium"},
+                ),
+                "stabilize": ("BOOLEAN", {"default": True}),
+            },
+        }
+    RETURN_TYPES = ("OMNI_PARAMS",)
+    FUNCTION = "get_params"
+    CATEGORY = "Omnidirectional Video"
+    def get_params(
+        self,
+        frame_interval,
+        width,
+        height,
+        fov_h,
+        fov_v,
+        base_pitch,
+        yaw_steps,
+        yaw_offset,
+        **kwargs,
+    ):
+        # Generate views based on parameters
+        views = {}
+        yaw_step = 360.0 / yaw_steps
+        # Add positive pitch views
+        for i in range(yaw_steps):
+            yaw = (i * yaw_step + yaw_offset) % 360
+            if yaw > 180:
+                yaw -= 360
+            views[f"pitch_{base_pitch}_yaw_{round(yaw,1)}"] = (base_pitch, yaw)
+        # Add negative pitch views
+        for i in range(yaw_steps):
+            yaw = (i * yaw_step + yaw_offset) % 360
+            if yaw > 180:
+                yaw -= 360
+            views[f"pitch_{-base_pitch}_yaw_{round(yaw,1)}"] = (
+                -base_pitch,
+                yaw,
+            )
+        params = {
+            "frame_interval": frame_interval,
+            "width": width,
+            "height": height,
+            "fov_h": fov_h,
+            "fov_v": fov_v,
+            "views": views,
+        }
+        params.update(kwargs)
+        return (params,)
+class OmniVideoProcessorNode:
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "omni_video": ("IMAGE",),
+                "omni_params": ("OMNI_PARAMS",),
+            }
+        }
+    RETURN_TYPES = ("OMNI_PROCESSED",)
+    FUNCTION = "process_video"
+    CATEGORY = "Omnidirectional Video"
+    def process_video(self, omni_video, omni_params):
+        import tempfile
+        import time
+        from tempfile import gettempdir
+        # VideoFromFile
+        run_timestamp = time.strftime("%Y%m%d-%H%M%S")
+        output_dir = Path(gettempdir()) / f"omni_output_{run_timestamp}"
+        output_dir.mkdir(parents=True, exist_ok=True)
+        processor = OmniVideoProcessor(omni_params)
+        panoramic_frames, pinhole_images_data = processor.process_video(omni_video, output_dir)
+        result = {
+            "output_dir": str(output_dir),
+            "panoramic_frames": panoramic_frames,
+            "pinhole_views": pinhole_images_data,
+        }
+        return (result,)
+class OmniReconstructionNode:
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "omni_processed": ("OMNI_PROCESSED",),
+                "colmap_path": ("STRING", {"default": "colmap"}),
+                "quality": (
+                    ["low", "medium", "high", "extreme"],
+                    {"default": "medium"},
+                ),
+            }
+        }
+    RETURN_TYPES = ("RECONSTRUCTION", "model_file")
+    RETURN_NAMES = ("reconstruction", "model_file")
+    FUNCTION = "run_reconstruction"
+    CATEGORY = "Omnidirectional Video"
+    def run_reconstruction(self, omni_processed, colmap_path, quality):
+        output_dir = Path(omni_processed["output_dir"])
+        image_dir = output_dir / "pinhole_images" / "images"
+        db_path = output_dir / "database.db"
+        rig_config_path = output_dir / "pinhole_images" / "rig_config.json"
+        sparse_dir = output_dir / "sparse"
+        dense_dir = output_dir / "dense"
+        # Create necessary directories
+        sparse_dir.mkdir(exist_ok=True)
+        dense_dir.mkdir(exist_ok=True)
+        cmds = [
+            f'"{colmap_path}" feature_extractor --database_path "{db_path}" --image_path "{image_dir}" --ImageReader.camera_model PINHOLE --ImageReader.single_camera_per_folder 1',
+            f'"{colmap_path}" sequential_matcher --database_path "{db_path}" --SequentialMatching.loop_detection 1',
+            f'"{colmap_path}" mapper --database_path "{db_path}" --image_path "{image_dir}" --output_path "{sparse_dir}" --Mapper.ba_refine_focal_length 0 --Mapper.ba_refine_principal_point 0 --Mapper.ba_refine_extra_params 0',
+        ]
+        for cmd in cmds:
+            print(f"Executing: {cmd}")
+            ret = os.system(cmd)
+            if ret != 0:
+                raise RuntimeError(f"Command failed with exit code {ret}: {cmd}")
+        # generate mesh and point cloud
+        cameras, images, points3D = read_model(sparse_dir / "0")
+        sparse_ply_path = sparse_dir / "0" / "sparse.ply"
+        # points3d_data = []
+        # for pts in points3D.values():
+        #     # pts.rgb = pts.rgb.astype(np.float32) / 255.0
+        #     points3d_data.append(
+        #         (
+        #             pts.xyz[0],
+        #             pts.xyz[1],
+        #             pts.xyz[2],
+        #             pts.rgb[0],
+        #             pts.rgb[1],
+        #             pts.rgb[2],
+        #         )
+        #     )
+        # with open(sparse_ply_path, "w") as f:
+        #     f.write("ply\n")
+        #     f.write("format ascii 1.0\n")
+        #     f.write(f"element vertex {len(points3d_data)}\n")
+        #     f.write("property float x\n")
+        #     f.write("property float y\n")
+        #     f.write("property float z\n")
+        #     f.write("property uchar red\n")
+        #     f.write("property uchar green\n")
+        #     f.write("property uchar blue\n")
+        #     f.write("end_header\n")
+        #     for p in points3d_data:
+        #         f.write(f"{p[0]} {p[1]} {p[2]} {int(p[3])} {int(p[4])} {int(p[5])}\n")
+        print(f"Generated sparse point cloud at: {sparse_ply_path}")
+        return (
+            str(sparse_dir / "0"),
+            str(sparse_ply_path),
+        )
+class OmniPreviewNode:
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "reconstruction": ("RECONSTRUCTION",),
+                "model_file": ("model_file",),
+            },
+            "optional": {
+                "show_type": (
+                    ["input_frame", "reconstruction", "mesh", "model_file"],
+                    {"default": "input_frame"},
+                ),
+                "view_yaw": (
+                    "FLOAT",
+                    {"default": 0.0, "min": -180.0, "max": 180.0},
+                ),
+                "view_pitch": (
+                    "FLOAT",
+                    {"default": 0.0, "min": -90.0, "max": 90.0},
+                ),
+            },
+        }
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "generate_preview"
+    CATEGORY = "Omnidirectional Video"
+    def _create_placeholder_preview(self, text):
+        img = Image.new("RGB", (640, 480), (30, 30, 50))
+        try:
+            from PIL import ImageDraw, ImageFont
+            draw = ImageDraw.Draw(img)
+            try:
+                font = ImageFont.truetype("Arial.ttf", 40)
+            except:
+                font = ImageFont.load_default()
+            text_width = draw.textlength(text, font=font)
+            position = ((640 - text_width) // 2, 220)
+            draw.text(position, text, fill=(200, 200, 255), font=font)
+        except ImportError:
+            pass
+        return img
+    def generate_preview(self, show_type="input_frame", view_yaw=0.0, view_pitch=0.0, **kwargs):
+        blank_image = self._create_placeholder_preview("No Preview Available")
+        def to_tensor(img):
+            img = img.convert("RGB").resize((640, 480))
+            return torch.from_numpy(np.array(img).astype(np.float32) / 255.0)[None,]
+        if show_type in ["reconstruction", "mesh", "model_file"]:
+            file_path = kwargs.get(show_type)
+            if file_path and Path(file_path).exists():
+                text = f"{show_type.replace('_', ' ').title()} Ready"
+                image = self._create_placeholder_preview(text)
+                return (to_tensor(image),)
+        return (to_tensor(blank_image),)
+# NEW NODE FOR ADVANCED VISUALIZATION
+class OmniAdvancedPreviewNode:
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "omni_processed": ("OMNI_PROCESSED",),
+                "show_type": (["Pinhole Images", "Panoramic Frames"],),
+                "max_items_to_show": (
+                    "INT",
+                    {"default": 8, "min": 1, "max": 64},
+                ),
+                "start_index": ("INT", {"default": 0, "min": 0}),
+                "enable_annotation": ("BOOLEAN", {"default": True}),
+            }
+        }
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "generate_preview_batch"
+    CATEGORY = "Omnidirectional Video"
+    def generate_preview_batch(
+        self,
+        omni_processed,
+        show_type,
+        max_items_to_show,
+        start_index,
+        enable_annotation,
+    ):
+        images_to_process = []
+        if show_type == "Pinhole Images" and "pinhole_views" in omni_processed:
+            images_to_process = omni_processed["pinhole_views"]
+        elif show_type == "Panoramic Frames" and "panoramic_frames" in omni_processed:
+            images_to_process = omni_processed["panoramic_frames"]
+        if not images_to_process:
+            blank_image = Image.new("RGB", (256, 256), "black")
+            return (torch.from_numpy(np.array(blank_image).astype(np.float32) / 255.0)[None,],)
+        # 分页逻辑
+        end_index = start_index + max_items_to_show
+        subset = images_to_process[start_index:end_index]
+        output_images = []
+        for item in subset:
+            if isinstance(item, dict) and "image" in item:
+                img_data = item["image"]
+            if isinstance(item, dict) and "frame" in item:
+                img_data = item["frame"]
+            if isinstance(img_data, str):
+                img_data = cv2.imread(img_data)
+                img_data = cv2.cvtColor(img_data, cv2.COLOR_BGR2RGB)
+            if img_data is None:
+                print(f"Warning: Image data is None for item {item}")
+                continue
+            pil_img = Image.fromarray(img_data)
+            if show_type == "Pinhole Images" and enable_annotation:
+                from PIL import ImageDraw, ImageFont
+                draw = ImageDraw.Draw(pil_img)
+                try:
+                    font = ImageFont.truetype("arial.ttf", 20)
+                except IOError:
+                    font = ImageFont.load_default()
+                text = (
+                    f"P: {item['pitch']:.1f}, Y: {item['yaw']:.1f}\n"
+                    f"Size: {item['width']}x{item['height']}\n"
+                    f"Pano Idx: {item['pano_index']}"
+                )
+                draw.text((10, 10), text, font=font, fill="yellow")
+            img_tensor = torch.from_numpy(np.array(pil_img).astype(np.float32) / 255.0)
+            output_images.append(img_tensor)
+        if not output_images:
+            blank_image = Image.new("RGB", (256, 256), "black")
+            return (torch.from_numpy(np.array(blank_image).astype(np.float32) / 255.0)[None,],)
+        return (torch.stack(output_images),)
+# UPDATE THE NODE MAPPINGS
+NODE_CLASS_MAPPINGS = {
+    # "OmniLoadVideoUpload": OmniLoadVideoUpload,
+    "OmniParameterControls": OmniParameterControls,
+    "OmniVideoProcessor": OmniVideoProcessorNode,
+    "OmniReconstruction": OmniReconstructionNode,
+    "OmniPreview": OmniPreviewNode,  # Keeping the old one for simple previews
+    "OmniAdvancedPreview": OmniAdvancedPreviewNode,  # Adding the new one
+}
+NODE_DISPLAY_NAME_MAPPINGS = {
+    # "OmniLoadVideoUpload": "Load Omni Video Upload",
+    "OmniParameterControls": "Omnidirectional Parameters",
+    "OmniVideoProcessor": "Process Omnidirectional Video",
+    "OmniReconstruction": "Run COLMAP Reconstruction",
+    "OmniPreview": "Omni Model Preview",  # Renamed for clarity
+    "OmniAdvancedPreview": "Omni Advanced Preview",  # New node's display name
+}

src/gradio_ui.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import time
+from pathlib import Path
+import cv2
+import gradio as gr
+from PIL import Image
+from .omni_processor import OmniVideoProcessor
+class OmniConverterUI:
+    def __init__(self):
+        self.processor = OmniVideoProcessor()
+        self.default_params = self.processor.params.copy()
+        self.max_gallery_items = 20
+    def create_interface(self):
+        """Create Gradio interface"""
+        with gr.Blocks(title="Omnidirectional Video to Pinhole Converter") as demo:
+            gr.Markdown("## Omnidirectional Video to Pinhole Converter")
+            with gr.Row():
+                with gr.Column():
+                    # Video input
+                    video_input = gr.File(label="Upload Video", type="filepath")
+                    # Submit button
+                    submit_btn = gr.Button("Convert", variant="primary")
+                    # Frame extraction settings
+                    with gr.Accordion("Frame Extraction", open=True):
+                        frame_interval = gr.Slider(
+                            1,
+                            100,
+                            value=self.default_params["frame_interval"],
+                            label="Frame Interval",
+                            interactive=True,
+                        )
+                    # Pinhole camera settings
+                    with gr.Accordion("Pinhole Parameters", open=True):
+                        with gr.Row():
+                            image_width = gr.Slider(
+                                100,
+                                2000,
+                                value=self.default_params["width"],
+                                label="Image Width",
+                                interactive=True,
+                            )
+                            image_height = gr.Slider(
+                                100,
+                                2000,
+                                value=self.default_params["height"],
+                                label="Image Height",
+                                interactive=True,
+                            )
+                        with gr.Row():
+                            cx = gr.Slider(
+                                50,
+                                1000,
+                                value=self.default_params["cx"],
+                                label="Principal Point X",
+                                interactive=True,
+                            )
+                            cy = gr.Slider(
+                                50,
+                                1000,
+                                value=self.default_params["cy"],
+                                label="Principal Point Y",
+                                interactive=True,
+                            )
+                        with gr.Row():
+                            fov_h = gr.Slider(
+                                30,
+                                180,
+                                value=self.default_params["fov_h"],
+                                label="Horizontal FOV (deg)",
+                                interactive=True,
+                            )
+                            fov_v = gr.Slider(
+                                30,
+                                180,
+                                value=self.default_params["fov_v"],
+                                label="Vertical FOV (deg)",
+                                interactive=True,
+                            )
+                        with gr.Row():
+                            fx = gr.Slider(
+                                50,
+                                1000,
+                                value=self.default_params["fx"],
+                                label="Focal Length X",
+                                interactive=True,
+                            )
+                            fy = gr.Slider(
+                                50,
+                                1000,
+                                value=self.default_params["fy"],
+                                label="Focal Length Y",
+                                interactive=True,
+                            )
+                    # View selection
+                    with gr.Accordion("Custom View editions", open=False):
+                        with gr.Row():
+                            custom_pitch = gr.Slider(-90, 90, value=0, label="Custom Pitch")
+                            custom_yaw = gr.Slider(-180, 180, value=0, label="Custom Yaw")
+                        add_custom = gr.Button("Add Custom View")
+                with gr.Column():
+                    # Results display
+                    output_gallery = gr.Gallery(
+                        label="Generated Pinhole Images",
+                        columns=len(self.default_params["views"]),  # Use initial value
+                        object_fit="contain",
+                        height="auto",
+                    )
+                    view_state_display = gr.JSON(
+                        label="Current Views",
+                        value=self.default_params["views"].copy(),
+                    )
+            # Initialize views state
+            views_state = gr.State(self.default_params["views"].copy())
+            # Event handlers
+            add_custom.click(
+                fn=self._update_views,
+                inputs=[custom_pitch, custom_yaw, views_state],
+                outputs=[views_state, view_state_display],
+            )
+            submit_btn.click(
+                fn=self._run_conversion,
+                inputs=[
+                    video_input,
+                    frame_interval,
+                    fx,
+                    fy,
+                    cx,
+                    cy,
+                    image_width,
+                    image_height,
+                    fov_h,
+                    fov_v,
+                    views_state,
+                ],
+                outputs=output_gallery,
+            )
+        return demo
+    def _update_views(self, pitch, yaw, current_views):
+        """Update views state with new custom view"""
+        new_views = {**current_views, f"pitch_{pitch}_yaw_{yaw}": (pitch, yaw)}
+        return new_views, new_views
+    def _run_conversion(self, video_file, *params):
+        """Run conversion with progress tracking"""
+        param_names = [
+            "frame_interval",
+            "fx",
+            "fy",
+            "cx",
+            "cy",
+            "width",
+            "height",
+            "fov_h",
+            "fov_v",
+            "views",
+        ]
+        params_dict = dict(zip(param_names, params))
+        self.processor.set_params(params_dict)
+        output_dir = Path.cwd() / "outputs" / time.strftime("%Y%m%d%H%M%S")
+        output_dir.mkdir(parents=True, exist_ok=True)
+        pano_images, pinhole_images_data = self.processor.process_video(video_file.name, output_dir)
+        image_list_for_gallery = [
+            (
+                Image.fromarray(cv2.cvtColor(img_info["image"], cv2.COLOR_BGR2RGB)),
+                "Frame {}, View: {}".format(img_info["pano_index"], img_info["view_name"]),
+            )
+            for img_info in pinhole_images_data
+        ][: self.max_gallery_items]
+        if not image_list_for_gallery:
+            return gr.update(value=[], visible=False)
+        return gr.update(columns=len(params_dict["views"]), value=image_list_for_gallery)

src/omni_processor.py ADDED Viewed

	@@ -0,0 +1,225 @@

+import json
+from pathlib import Path
+import cv2
+import numpy as np
+import py360convert
+import torch
+from scipy.spatial.transform import Rotation as R
+from tqdm import tqdm
+def compute_focal_length(image_size, fov_deg):
+    return (image_size / 2) / np.tan(np.deg2rad(fov_deg) / 2)
+class OmniVideoProcessor:
+    default_params = {
+        "fx": 320.0,
+        "fy": 320.0,
+        "cx": 320.0,
+        "cy": 320.0,
+        "height": 640,
+        "width": 640,
+        "fov_h": 90,
+        "fov_v": 90,
+        "frame_interval": 24,
+        "num_steps_yaw": 4,
+        "pitches_deg": [-35.0, 35.0],
+        "views": {
+            "pitch_35_yaw_0": (35, 0),
+            "pitch_35_yaw_90": (35, 60),
+            "pitch_35_yaw_-90": (35, -90),
+            "pitch_35_yaw_180": (35, 180),
+            "pitch_-35_yaw_0": (-35, 0),
+            "pitch_-35_yaw_90": (-35, 60),
+            "pitch_-35_yaw_-90": (-35, -90),
+            "pitch_-35_yaw_180": (-35, 180),
+        },
+    }
+    def __init__(self, params={}):
+        self.params = params if params else self.default_params.copy()
+        self.ref_sensor = list(self.params["views"].keys())[0]
+    def set_params(self, params):
+        self.params = params
+    def process_video(self, video_or_path, output_dir):
+        output_dir = Path(output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        if isinstance(video_or_path, str):
+            video_file = Path(video_or_path)
+            video = cv2.VideoCapture(str(video_file))
+            if not video.isOpened():
+                raise IOError(f"Cannot open video file: {video_file}")
+            pano_images = self._extract_frames(video, output_dir)
+            video.release()
+        elif isinstance(video_or_path, torch.Tensor) or isinstance(video_or_path, np.ndarray):
+            pano_images = self._extract_frames_torch(video_or_path)
+        else:
+            raise ValueError("video_or_path must be a string or Path object")
+        pinhole_images_data = self._generate_pinhole_images(pano_images, output_dir)
+        return pano_images, pinhole_images_data
+    def _extract_frames(self, video, output_dir):
+        frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        pano_images = []
+        for frame_idx in tqdm(range(frame_count), desc="Extracting Frames"):
+            ret, frame = video.read()
+            if not ret:
+                break
+            if frame_idx % self.params["frame_interval"] == 0:
+                pano_images.append({"image": frame, "idx": frame_idx})
+        return pano_images
+    def _extract_frames_torch(self, video_tensor):
+        if not isinstance(video_tensor, torch.Tensor):
+            raise ValueError("video_tensor must be a torch.Tensor")
+        pano_images = []
+        num_frames = video_tensor.shape[0]
+        for frame_idx in tqdm(range(num_frames), desc="Extracting Frames"):
+            if frame_idx % self.params["frame_interval"] == 0:
+                # Assuming video_tensor is normalized [0, 1], rgb mode
+                img = video_tensor[frame_idx].numpy() * 255.0
+                img = img.astype(np.uint8)  # Convert to uint8
+                pano_images.append({"image": img, "idx": frame_idx})
+        return pano_images
+    def _generate_pinhole_images(self, pano_images, output_dir):
+        output_pinhole_dir = output_dir / "pinhole_images" / "images"
+        output_pinhole_dir.mkdir(parents=True, exist_ok=True)
+        pinhole_data = []
+        camera_params_list = []
+        camera_rig_params = {}
+        pinhole_views = []
+        for pano_info in tqdm(pano_images, desc="Generating Pinhole Views"):
+            pano_idx, pano_image = pano_info["idx"], pano_info["image"]
+            for view_name, (pitch, yaw) in self.params["views"].items():
+                pinhole_image = self._convert_to_pinhole(pano_image, pitch, yaw)
+                save_dir = output_pinhole_dir / view_name
+                save_dir.mkdir(parents=True, exist_ok=True)
+                save_path = save_dir / f"{pano_idx:06d}.jpg"
+                cv2.imwrite(str(save_path), pinhole_image)
+                h, w = pinhole_image.shape[:2]
+                pinhole_views.append(
+                    {
+                        "image": pinhole_image,
+                        "pano_index": pano_idx,
+                        "view_name": view_name,
+                        "pitch": pitch,
+                        "yaw": yaw,
+                        "width": w,
+                        "height": h,
+                        "save_path": str(save_path),
+                    }
+                )
+                pinhole_data.append((pano_idx, view_name, pinhole_image, str(save_path)))
+                is_ref = view_name == self.ref_sensor
+                cam_params = self._create_camera_params(
+                    save_path, pano_idx, view_name, pitch, yaw, is_ref
+                )
+                camera_params_list.append(cam_params)
+                if view_name not in camera_rig_params:
+                    camera_rig_params[view_name] = {
+                        "image_prefix": view_name,
+                        "yaw": yaw,
+                        "pitch": pitch,
+                        "ref_sensor": is_ref,
+                    }
+        self._save_camera_params(
+            camera_params_list,
+            output_dir / "pinhole_images" / "camera_params.json",
+        )
+        self._save_colmap_camera_rig(
+            camera_rig_params, output_dir / "pinhole_images" / "rig_config.json"
+        )
+        return pinhole_views
+    def _convert_to_pinhole(self, pano_image, pitch, yaw):
+        return py360convert.e2p(
+            e_img=pano_image,
+            fov_deg=(self.params["fov_h"], self.params["fov_v"]),
+            u_deg=yaw,
+            v_deg=pitch,
+            out_hw=(self.params["height"], self.params["width"]),
+            in_rot_deg=0,
+            mode="bilinear",
+        )
+    def _create_camera_params(
+        self, save_path: Path, pano_idx, view_name, pitch, yaw, ref_sensor=None
+    ):
+        fx = compute_focal_length(self.params["width"], self.params["fov_h"])
+        fy = compute_focal_length(self.params["height"], self.params["fov_v"])
+        return {
+            "image_name": save_path.name,
+            "image_prefix": view_name,
+            "fx": fx,
+            "fy": fy,
+            "cx": self.params["width"] / 2,
+            "cy": self.params["height"] / 2,
+            "height": self.params["height"],
+            "width": self.params["width"],
+            "fov_h": self.params["fov_h"],
+            "fov_v": self.params["fov_v"],
+            "yaw": yaw,
+            "pitch": pitch,
+            "pano_index": pano_idx,
+            "ref_sensor": ref_sensor,
+        }
+    def _save_camera_params(self, params, output_file):
+        with open(output_file, "w") as f:
+            json.dump(params, f, indent=4)
+    def _save_colmap_camera_rig(self, camera_rig_params, output_file):
+        if not self.params["views"]:
+            return
+        ref_view_name = list(self.params["views"].keys())[0]
+        ref_pitch, ref_yaw = self.params["views"][ref_view_name]
+        # COLMAP: X right, Y down, Z forward. Euler: yaw, pitch, roll
+        R_ref_world = R.from_euler("yx", [ref_yaw, ref_pitch], degrees=True)
+        rig_cameras = []
+        for image_prefix, params in camera_rig_params.items():
+            R_view_world = R.from_euler("yx", [params["yaw"], params["pitch"]], degrees=True)
+            R_view_ref = R_view_world.inv() * R_ref_world  # Cam from Rig
+            # Scipy quat (x,y,z,w) -> COLMAP quat (w,x,y,z)
+            qvec_scipy = R_view_ref.as_quat()
+            qvec_colmap = [
+                qvec_scipy[3],
+                qvec_scipy[0],
+                qvec_scipy[1],
+                qvec_scipy[2],
+            ]
+            cam_entry = {"image_prefix": image_prefix}
+            if params.get("ref_sensor"):
+                cam_entry["ref_sensor"] = True
+            else:
+                cam_entry["cam_from_rig_rotation"] = qvec_colmap
+                cam_entry["cam_from_rig_translation"] = [0.0, 0.0, 0.0]
+            rig_cameras.append(cam_entry)
+        colmap_rig_config = [{"cameras": rig_cameras}]
+        with open(output_file, "w") as f:
+            json.dump(colmap_rig_config, f, indent=4)