Spaces:

jev-aleks
/

SceneDINO

Running on Zero

App Files Files Community

jev-aleks commited on 14 days ago

Commit

9e15541

1 Parent(s): 975fa86

scenedino init

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
.gitignore +213 -0
LICENSE.txt +201 -0
README.md +2 -1
app.py +213 -0
configs/dataset/bdd_seg.yaml +2 -0
configs/dataset/cityscapes_seg.yaml +2 -0
configs/dataset/kitti_360_sscbench.yaml +15 -0
configs/dataset/realestate10k.yaml +3 -0
configs/downstream/semantic.yaml +13 -0
configs/evaluate_semantic_bdd.yaml +50 -0
configs/evaluate_semantic_cityscapes.yaml +50 -0
configs/evaluate_semantic_kitti_360.yaml +50 -0
configs/model/dino_downsampler.yaml +64 -0
configs/model/dino_upsampler.yaml +64 -0
configs/model/dinov2_downsampler.yaml +64 -0
configs/renderer/pixelnerf.yaml +9 -0
configs/train_scenedino_kitti_360.yaml +43 -0
configs/train_scenedino_re10k.yaml +49 -0
configs/train_semantic_kitti_360.yaml +52 -0
configs/training/loss/scenedino.yaml +15 -0
configs/training/loss/semantic.yaml +10 -0
configs/training/optimizer/scenedino.yaml +7 -0
configs/training/optimizer/semantic.yaml +7 -0
configs/training/scenedino.yaml +21 -0
configs/training/scheduler/scenedino.yaml +3 -0
configs/training/semantic.yaml +16 -0
configs/validation/scenedino.yaml +78 -0
configs/validation/semantic.yaml +80 -0
datasets/__init__.py +0 -0
datasets/bdd/bdd_dataset.py +164 -0
datasets/cityscapes/cityscapes_dataset.py +82 -0
datasets/data_util.py +307 -0
datasets/kitti_360/__init__.py +0 -0
datasets/kitti_360/annotation.py +538 -0
datasets/kitti_360/compute_kitti_360_bbox_split.py +110 -0
datasets/kitti_360/kitti_360_dataset.py +1263 -0
datasets/kitti_360/labels.py +200 -0
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0000_sync/poses.txt +0 -0
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0002_sync/poses.txt +0 -0
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0003_sync/poses.txt +0 -0
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0004_sync/poses.txt +0 -0
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0005_sync/poses.txt +0 -0
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0006_sync/poses.txt +0 -0
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0007_sync/poses.txt +0 -0
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0009_sync/poses.txt +0 -0
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0010_sync/poses.txt +0 -0
datasets/kitti_360/preprocess_kitti_360.py +81 -0
datasets/kitti_360/splits/seg/test_files.txt +446 -0
datasets/kitti_360/splits/seg/train_files.txt +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,213 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+# Gradio
+.gradio
+# outputs and checkpoints
+out/

LICENSE.txt ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md CHANGED Viewed

@@ -1,9 +1,10 @@
 ---
 title: SceneDINO
-emoji: 📈
 colorFrom: blue
 colorTo: pink
 sdk: gradio
 sdk_version: 5.35.0
 app_file: app.py
 pinned: false

 ---
 title: SceneDINO
+emoji: 🦕
 colorFrom: blue
 colorTo: pink
 sdk: gradio
+python_version: 3.10
 sdk_version: 5.35.0
 app_file: app.py
 pinned: false

app.py ADDED Viewed

	@@ -0,0 +1,213 @@

+from demo_utils.utils import (load_modules,
+                              load_sample_from_path,
+                              load_sample_from_dataset,
+                              get_fov_mask,
+                              inference_3d,
+                              inference_rendered_2d)
+import tempfile
+import os
+import sys
+import yaml
+sys.path.append("./sscbench")
+from sscbench.gen_voxelgrid_npy import save_as_voxel_ply, classes_to_colors
+from download_checkpoint_hf import download_scenedino_checkpoint
+import torch
+import numpy as np
+import gradio as gr
+import open3d as o3d
+import spaces
+# Load checkpoints from Hugging Face
+download_scenedino_checkpoint("ssc-kitti-360-dino")
+download_scenedino_checkpoint("ssc-kitti-360-dinov2")
+# Load model, ray sampler, datasets
+ckpt_path = "out/scenedino-pretrained/seg-best-dino/"
+ckpt_name = "checkpoint.pt"
+net_v1, renderer_v1, ray_sampler_v1, test_dataset = load_modules(ckpt_path, ckpt_name)
+renderer_v1.eval()
+ckpt_path = "out/scenedino-pretrained/seg-best-dinov2/"
+ckpt_name = "checkpoint.pt"
+net_v2, renderer_v2, ray_sampler_v2, _ = load_modules(ckpt_path, ckpt_name)
+renderer_v2.eval()
+def convert_voxels(arr, map_dict):
+    f = np.vectorize(map_dict.__getitem__)
+    return f(arr)
+with open("sscbench/label_maps.yaml", "r") as f:
+    label_maps = yaml.safe_load(f)
+@spaces.GPU(duration=60)
+def demo_run(image: str,
+             backbone: str,
+             mode: str,
+             sigma_threshold: float,
+             resolution: float,
+             x_range: int,
+             y_range: int,
+             z_range: int):
+    if backbone == "DINO (ViT-B)":
+        net, renderer, ray_sampler = net_v1, renderer_v1, ray_sampler_v1
+    elif backbone == "DINOv2 (ViT-B)":
+        net, renderer, ray_sampler = net_v2, renderer_v2, ray_sampler_v2
+    prediction_mode = "stego_kmeans"
+    if mode == "Feature PCA 1-3":
+        segmentation = False
+        rgb_from_pca_dim = 0
+    elif mode == "Feature PCA 4-6":
+        segmentation = False
+        rgb_from_pca_dim = 3
+    elif mode == "Feature PCA 7-9":
+        segmentation = False
+        rgb_from_pca_dim = 6
+    elif mode == "SSC (unsup.)":
+        segmentation = True
+    elif mode == "SSC (linear)":
+        segmentation = True
+        prediction_mode = "direct_linear"
+    # Necessary when reading from examples? cast from str
+    sigma_threshold, resolution = float(sigma_threshold), float(resolution)
+    x_range, y_range, z_range = int(x_range), int(y_range), int(z_range)
+    # Too many voxels
+    max_voxel_count = 5000000
+    voxel_count = (x_range//resolution + 1) * (y_range//resolution + 1) * (z_range//resolution + 1)
+    if voxel_count > max_voxel_count:
+        raise gr.Error(f"Too many voxels ({int(voxel_count) / 1_000_000:.1f}M > {max_voxel_count / 1_000_000:.1f}M).\n" +
+                        "Reduce voxel resolution or range.", duration=5)
+    with torch.no_grad():
+        images, poses, projs = load_sample_from_path(image, intrinsic=None)
+        net.encode(images, projs, poses, ids_encoder=[0])
+        net.set_scale(0)
+        # 2D Features output
+        dino_full_2d, depth_2d, seg_2d = inference_rendered_2d(net, poses, projs, ray_sampler, renderer, prediction_mode)
+        net.encoder.fit_visualization(dino_full_2d.flatten(0, -2))
+        if segmentation:
+            output_2d = convert_voxels(seg_2d.detach().cpu(), label_maps["cityscapes_to_label"])
+            output_2d = classes_to_colors[output_2d].cpu().detach().numpy()
+        else:
+            output_2d = net.encoder.transform_visualization(dino_full_2d, from_dim=rgb_from_pca_dim)
+            output_2d -= output_2d.min()
+            output_2d /= output_2d.max()
+            output_2d = output_2d.cpu().detach().numpy()
+        # Chunking
+        max_chunk_size = 100000
+        z_layers_per_chunk = max_chunk_size // ((x_range//resolution + 1) * (y_range//resolution + 1))
+        # 3D Features output
+        x_range = (-x_range/2, x_range)
+        y_range = (-y_range/2, y_range)
+        z_range = (0, z_range)
+        is_occupied, output_3d, fov_mask = [], [], []
+        current_z = 0
+        while current_z <= z_range[1]:
+            z_range_chunk = (current_z, min(current_z + z_layers_per_chunk*resolution, z_range[1]))
+            current_z += (z_layers_per_chunk+1) * resolution
+            xyz_chunk, dino_full_3d_chunk, sigma_3d_chunk, seg_3d_chunk = inference_3d(net, x_range, y_range, z_range_chunk, resolution, prediction_mode)
+            fov_mask_chunk = get_fov_mask(projs[0, 0], xyz_chunk)
+            is_occupied_chunk = sigma_3d_chunk > sigma_threshold
+            if segmentation:
+                output_3d_chunk = seg_3d_chunk
+            else:
+                output_3d_chunk = net.encoder.transform_visualization(dino_full_3d_chunk, from_dim=rgb_from_pca_dim)
+                output_3d_chunk -= output_3d_chunk.min()
+                output_3d_chunk /= output_3d_chunk.max()
+                output_3d_chunk = torch.clamp(output_3d_chunk*1.2 - 0.1, 0.0, 1.0)
+                output_3d_chunk = (255*output_3d_chunk).int()
+            fov_mask_chunk = fov_mask_chunk.reshape(is_occupied_chunk.shape)
+            is_occupied.append(is_occupied_chunk)
+            output_3d.append(output_3d_chunk)
+            fov_mask.append(fov_mask_chunk)
+        is_occupied = torch.cat(is_occupied, dim=2)
+        output_3d = torch.cat(output_3d, dim=2)
+        fov_mask = torch.cat(fov_mask, dim=2)
+    temp_dir = tempfile.gettempdir()
+    ply_path = os.path.join(temp_dir, "output.ply")
+    if segmentation:
+        # mapped to "unlabeled"
+        is_occupied[output_3d == 10] = 0
+        is_occupied[output_3d == 12] = 0
+        save_as_voxel_ply(ply_path,
+                          is_occupied.detach().cpu(),
+                          voxel_size=resolution,
+                          size=is_occupied.size(),
+                          classes=torch.Tensor(
+                              convert_voxels(
+                                  output_3d.detach().cpu(),
+                                  label_maps["cityscapes_to_label"])),
+                          fov_mask=fov_mask)
+    else:
+        save_as_voxel_ply(ply_path,
+                          is_occupied.detach().cpu(),
+                          voxel_size=resolution,
+                          size=is_occupied.size(),
+                          colors=output_3d.detach().cpu(),
+                          fov_mask=fov_mask)
+    mesh = o3d.io.read_triangle_mesh(ply_path)
+    glb_path = os.path.join(temp_dir, "output.glb")
+    o3d.io.write_triangle_mesh(glb_path, mesh, write_ascii=True)
+    del dino_full_2d, depth_2d, seg_2d
+    del dino_full_3d_chunk, sigma_3d_chunk, seg_3d_chunk, is_occupied_chunk
+    del is_occupied, output_3d, fov_mask
+    torch.cuda.empty_cache()
+    return output_2d, glb_path
+demo = gr.Interface(
+    demo_run,
+    inputs=[
+        gr.Image(label="Input image", type="filepath"),
+        gr.Radio(label="Backbone", choices=["DINO (ViT-B)", "DINOv2 (ViT-B)"]),
+        gr.Radio(label="Mode", choices=["Feature PCA 1-3", "Feature PCA 4-6", "Feature PCA 7-9", "SSC (unsup.)", "SSC (linear)"]),
+        gr.Slider(label="Density threshold", minimum=0, maximum=1, step=0.05, value=0.2),
+        gr.Slider(label="Resolution [m]", minimum=0.05, maximum=0.5, step=0.1, value=0.2),
+        gr.Slider(label="X Range [m]", minimum=1, maximum=50, step=1, value=10),
+        gr.Slider(label="Y Range [m]", minimum=1, maximum=50, step=1, value=10),
+        gr.Slider(label="Z Range [m]", minimum=1, maximum=100, step=1, value=20),
+    ],
+    outputs=[
+        gr.Image(label="Rendered 2D Visualization"),
+        gr.Model3D(label="Voxel Surface 3D Visualization",
+                   zoom_speed=0.5, pan_speed=0.5,
+                   clear_color=[0.0, 0.0, 0.0, 0.0],
+                   camera_position=[-90, 80, None],
+                   display_mode="solid"),
+    ],
+    title="SceneDINO Demo",
+    examples="demo_utils/examples",
+)
+demo.launch()

configs/dataset/bdd_seg.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ type: "BDD_seg"
2	+ data_path: "<PATH-BDD>"

configs/dataset/cityscapes_seg.yaml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ type: "Cityscapes_seg"
2	+ data_path: "<PATH-CITYSCAPES>"

configs/dataset/kitti_360_sscbench.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+type: "old_KITTI_360"
+data_path: "<PATH-KITTI-360>"
+pose_path: "<PATH-KITTI-360-DATA-POSES>"
+split_path: "datasets/kitti_360/splits/sscbench"
+image_size: [ 192, 640 ]
+data_stereo: true
+data_fisheye: true
+data_fc: 2
+# dilation: 10
+# color_aug: true
+fisheye_offset: [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]
+stereo_offset: [0]
+is_preprocessed: true
+fisheye_rotation: -15
+data_segmentation: true

configs/dataset/realestate10k.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+type: "RealEstate10K"
+data_path: "<PATH-REALESTATE-PKL>.pickle"
+image_size: [288, 512]

configs/downstream/semantic.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+type: "segmentation"
+n_classes: 19
+gt_classes: 19
+input_dim: 384
+code_dim: 64
+knn_neighbors: 4
+buffer_size: 256
+patch_sample_size: 576
+mode: "3d"
+apply_crf: False

configs/evaluate_semantic_bdd.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+defaults:
+  - dataset: bdd_seg
+  - model: dino_downsampler
+  - renderer: pixelnerf
+  - training: semantic
+  - validation: semantic
+  - downstream: semantic
+  - _self_
+training_type: "downstream_training"
+mode: "nvs"
+seed: 0
+backend: null
+nproc_per_node: null
+with_amp: false
+name: "training"
+batch_size: 1
+gradient_accum_factor: 1
+num_workers: 6
+renderer:
+  n_coarse : 32
+  n_fine : 0
+  n_fine_depth : 0
+  depth_std : 1.0
+  sched : []
+  white_bkgd : false
+  lindisp: true
+  hard_alpha_cap: true
+  render_mode: volumetric
+  eval_batch_size: 65536
+  normalize_dino: true
+# eval_visualize: [0, 1, 2, 3]
+output:
+  path: "out/evaluation-paper"
+  unique_id: evaluation-bdd
+checkpoint: "<PATH-FEATURE-CHECKPOINT>.pt"
+evaluations:
+  - type: seg
+    agg_type: unsup_seg
+    args:
+      n_classes: 19
+      gt_classes: 19
+downstream:
+  input_dim: 768

configs/evaluate_semantic_cityscapes.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+defaults:
+  - dataset: cityscapes_seg
+  - model: dino_downsampler
+  - renderer: pixelnerf
+  - training: semantic
+  - validation: semantic
+  - downstream: semantic
+  - _self_
+training_type: "downstream_training"
+mode: "nvs"
+seed: 0
+backend: null
+nproc_per_node: null
+with_amp: false
+name: "training"
+batch_size: 1
+gradient_accum_factor: 1
+num_workers: 6
+renderer:
+  n_coarse : 32
+  n_fine : 0
+  n_fine_depth : 0
+  depth_std : 1.0
+  sched : []
+  white_bkgd : false
+  lindisp: true
+  hard_alpha_cap: true
+  render_mode: volumetric
+  eval_batch_size: 65536
+  normalize_dino: true
+# eval_visualize: [0, 1, 2, 3]
+output:
+  path: "out/evaluation-paper"
+  unique_id: evaluation-cityscapes
+checkpoint: "<PATH-FEATURE-CHECKPOINT>.pt"
+evaluations:
+  - type: seg
+    agg_type: unsup_seg
+    args:
+      n_classes: 19
+      gt_classes: 19
+downstream:
+  input_dim: 768

configs/evaluate_semantic_kitti_360.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+defaults:
+  - dataset: kitti_360_sscbench
+  - model: dino_downsampler
+  - renderer: pixelnerf
+  - training: semantic
+  - validation: semantic
+  - downstream: semantic
+  - _self_
+training_type: "downstream_training"
+mode: "nvs"
+seed: 0
+backend: null
+nproc_per_node: null
+with_amp: false
+name: "training"
+batch_size: 1
+gradient_accum_factor: 1
+num_workers: 6
+renderer:
+  n_coarse : 32
+  n_fine : 0
+  n_fine_depth : 0
+  depth_std : 1.0
+  sched : []
+  white_bkgd : false
+  lindisp: true
+  hard_alpha_cap: true
+  render_mode: volumetric
+  eval_batch_size: 65536
+  normalize_dino: true
+# eval_visualize: [0, 1, 2, 3]
+output:
+  path: "out/evaluation-paper"
+  unique_id: evaluation-kitti-360-sscbench
+checkpoint: "<PATH-FEATURE-CHECKPOINT>.pt"
+evaluations:
+  - type: seg
+    agg_type: unsup_seg
+    args:
+      n_classes: 19
+      gt_classes: 19
+downstream:
+  input_dim: 768

configs/model/dino_downsampler.yaml ADDED Viewed

	@@ -0,0 +1,64 @@

+arch: "BTSNet"
+use_code: true
+prediction_mode: default
+predict_dino: true
+dino_dims: 64  # == encoder.pca_dino_out
+compensate_artifacts: true
+flip_augmentation: true
+encoder:
+  type: "dinov2"
+  mode: "downsample-prediction"                 # upsample-gt, downsample-pred
+  decoder_arch: "dpt"
+  # upsampler_arch: "multiscale-crop"           # multiscale-crop, nearest
+  downsampler_arch: "featup"                    # featup, bilinear
+  encoder_arch: "vit-b"                         # vit-s, vit-b
+  version: "v1"                                 # v1, v2, reg, fit3d
+  separate_gt_version: "v1"                     # v1, v2, reg, fit3d, None
+  encoder_freeze: false
+  flip_avg_gt: false
+  dim_reduction_arch: "mlp"
+  num_ch_enc: [64, 64, 128, 256]
+  intermediate_features: [3, 6, 9]
+  decoder_out_dim: 256
+  dino_pca_dim: 64  # == dino_dims
+  image_size: [192, 640]
+  key_features: false
+code:
+  num_freqs: 6
+  freq_factor: 1.5
+  include_input: true
+decoder_heads:
+  - type: "resnet"
+    name: "normal_head"
+    freeze: false
+    args:
+      n_blocks: 0
+      d_hidden: 128
+final_prediction_head: "normal_head"
+encoding_strategy:
+  name: "default"
+  args: {}
+eval_encoding_strategy:
+  name: "default"
+  args: null
+loss_renderer_strategy:
+  name: "kitti_360"
+  args: null
+eval_loss_renderer_strategy:
+  name: "single_renderer"
+  args:
+    shuffle_frames: false
+    all_frames: true
+inv_z: true
+learn_empty: false
+code_mode: z
+n_frames_render: 4  # number of frames to render among v==8

configs/model/dino_upsampler.yaml ADDED Viewed

	@@ -0,0 +1,64 @@

+arch: "BTSNet"
+use_code: true
+prediction_mode: default
+predict_dino: true
+dino_dims: 64  # == encoder.pca_dino_out
+compensate_artifacts: false
+flip_augmentation: true
+encoder:
+  type: "dinov2"
+  mode: "upsample-gt"                           # upsample-gt, downsample-pred
+  decoder_arch: "dpt"
+  upsampler_arch: "multiscale-crop"             # multiscale-crop, nearest
+  # downsampler_arch: "featup"                  # featup, bilinear
+  encoder_arch: "vit-b"                         # vit-s, vit-b
+  version: "v1"                                 # v1, v2, reg, fit3d
+  separate_gt_version: "v1"                     # v1, v2, reg, fit3d, None
+  encoder_freeze: false
+  flip_avg_gt: false
+  dim_reduction_arch: "mlp"
+  num_ch_enc: [64, 64, 128, 256]
+  intermediate_features: [3, 6, 9]
+  decoder_out_dim: 256
+  dino_pca_dim: 64  # == dino_dims
+  image_size: [192, 640]
+  key_features: false
+code:
+  num_freqs: 6
+  freq_factor: 1.5
+  include_input: true
+decoder_heads:
+  - type: "resnet"
+    name: "normal_head"
+    freeze: false
+    args:
+      n_blocks: 0
+      d_hidden: 128
+final_prediction_head: "normal_head"
+encoding_strategy:
+  name: "default"
+  args: {}
+eval_encoding_strategy:
+  name: "default"
+  args: null
+loss_renderer_strategy:
+  name: "kitti_360"
+  args: null
+eval_loss_renderer_strategy:
+  name: "single_renderer"
+  args:
+    shuffle_frames: false
+    all_frames: true
+inv_z: true
+learn_empty: false
+code_mode: z
+n_frames_render: 4  # number of frames to render among v==8

configs/model/dinov2_downsampler.yaml ADDED Viewed

	@@ -0,0 +1,64 @@

+arch: "BTSNet"
+use_code: true
+prediction_mode: default
+predict_dino: true
+dino_dims: 64  # == encoder.pca_dino_out
+compensate_artifacts: true
+flip_augmentation: true
+encoder:
+  type: "dinov2"
+  mode: "downsample-prediction"                 # upsample-gt, downsample-pred
+  decoder_arch: "dpt"
+  # upsampler_arch: "multiscale-crop"           # multiscale-crop, nearest
+  downsampler_arch: "featup"                    # featup, bilinear
+  encoder_arch: "vit-b"                         # vit-s, vit-b
+  version: "v2"                                 # v1, v2, reg, fit3d
+  separate_gt_version: "v2"                     # v1, v2, reg, fit3d, None
+  encoder_freeze: false
+  flip_avg_gt: false
+  dim_reduction_arch: "mlp"
+  num_ch_enc: [64, 64, 128, 256]
+  intermediate_features: [3, 6, 9]
+  decoder_out_dim: 256
+  dino_pca_dim: 64  # == dino_dims
+  image_size: [192, 640]
+  key_features: false
+code:
+  num_freqs: 6
+  freq_factor: 1.5
+  include_input: true
+decoder_heads:
+  - type: "resnet"
+    name: "normal_head"
+    freeze: false
+    args:
+      n_blocks: 0
+      d_hidden: 128
+final_prediction_head: "normal_head"
+encoding_strategy:
+  name: "default"
+  args: {}
+eval_encoding_strategy:
+  name: "default"
+  args: null
+loss_renderer_strategy:
+  name: "kitti_360"
+  args: null
+eval_loss_renderer_strategy:
+  name: "single_renderer"
+  args:
+    shuffle_frames: false
+    all_frames: true
+inv_z: true
+learn_empty: false
+code_mode: z
+n_frames_render: 4  # number of frames to render among v==8

configs/renderer/pixelnerf.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+n_coarse : 32
+n_fine : 0
+n_fine_depth : 0
+depth_std : 1.0
+sched : []
+white_bkgd : false
+lindisp: true
+hard_alpha_cap: true
+eval_batch_size: 65536

configs/train_scenedino_kitti_360.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+defaults:
+  - dataset: kitti_360_sscbench
+  - model: dino_downsampler
+  - renderer: pixelnerf
+  - training: scenedino
+  - validation: scenedino
+  - _self_
+training_type: "full_training"
+mode: "nvs"
+seed: 0
+backend: null
+nproc_per_node: null
+with_amp: true
+name: "training"
+batch_size: 4
+num_workers: 4
+output:
+  path: "out/features-paper"
+  unique_id: scenedino-kitti-360-sscbench
+renderer:
+  n_coarse : 32
+  n_fine : 0
+  n_fine_depth : 0
+  depth_std : 1.0
+  sched : []
+  white_bkgd : false
+  lindisp: true
+  hard_alpha_cap: true
+  render_mode: volumetric
+  eval_batch_size: 65536
+  normalize_dino: true
+training:
+  ray_sampler:
+    args:
+      patch_size: 8
+      # ray_batch_size: 512
+  scheduler:
+    step_size: 50000

configs/train_scenedino_re10k.yaml ADDED Viewed

	@@ -0,0 +1,49 @@

+defaults:
+  - dataset: realestate10k
+  - model: dino_downsampler
+  - renderer: pixelnerf
+  - training: scenedino
+  - validation: scenedino
+  - _self_
+training_type: "full_training"
+mode: "nvs"
+seed: 0
+backend: null
+nproc_per_node: null
+with_amp: true
+name: "training"
+batch_size: 4
+num_workers: 4
+output:
+  path: "out/features-paper"
+  unique_id: scenedino-re10k
+renderer:
+  n_coarse : 32
+  n_fine : 0
+  n_fine_depth : 0
+  depth_std : 1.0
+  sched : []
+  white_bkgd : false
+  lindisp: true
+  hard_alpha_cap: true
+  render_mode: volumetric
+  eval_batch_size: 65536
+  normalize_dino: true
+model:
+  encoder:
+    image_size: [288, 512]
+  loss_renderer_strategy:
+    name: "alternate"
+training:
+  ray_sampler:
+    args:
+      patch_size: 8
+      # ray_batch_size: 512
+  scheduler:
+    step_size: 50000

configs/train_semantic_kitti_360.yaml ADDED Viewed

	@@ -0,0 +1,52 @@

+defaults:
+  - dataset: kitti_360_sscbench
+  - model: dino_downsampler
+  - renderer: pixelnerf
+  - training: semantic
+  - validation: semantic
+  - downstream: semantic
+  - _self_
+training_type: "downstream_training"
+mode: "nvs"
+seed: 0
+backend: null
+nproc_per_node: null
+with_amp: true
+name: "training"
+batch_size: 4
+gradient_accum_factor: 1
+num_workers: 6
+renderer:
+  n_coarse : 32
+  n_fine : 0
+  n_fine_depth : 0
+  depth_std : 1.0
+  sched : []
+  white_bkgd : false
+  lindisp: true
+  hard_alpha_cap: true
+  render_mode: volumetric
+  eval_batch_size: 65536
+  normalize_dino: true
+output:
+  path: "out/ssc-paper"
+  unique_id: ssc-kitti-360-sscbench
+training:
+  epoch_length: 1000
+  resume_from: "<PATH-FEATURE-CHECKPOINT>.pt"
+  optimizer:
+    args:
+      lr: 5e-4
+model:
+  sample_radius_3d: 0.5
+downstream:
+  input_dim: 768
+  mode: "3d"
+  # mlp_head: true

configs/training/loss/scenedino.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+- type: reconstruction
+  coarse:
+    criterion: "l1+ssim"
+    dino_criterion: "cosine"
+  invalid_policy: weight_guided
+  reconstruct_dino: true
+  lambda_dino_coarse: 0.2
+  temperature_dino: 5
+  regularizations:
+    - type: edge_aware_smoothness
+      lambda: 0.001
+    - type: dino_edge_aware_smoothness
+      lambda: 0.25

configs/training/loss/semantic.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+- type: stego
+  random_weight: 0.6702352279261414
+  knn_weight: 0.4156436438453117
+  self_weight: 0.08146997886146659
+  random_shift: 0.8709334888837256
+  knn_shift: 0.18458300726748128
+  self_shift: 0.43610463774158115
+  pointwise: false

configs/training/optimizer/scenedino.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+type: "adam"
+args:
+    lr: 1e-4
+    betas: [0.9, 0.999]
+    eps: 1e-08
+    weight_decay: 0.0
+    amsgrad: false

configs/training/optimizer/semantic.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+type: "adam"
+args:
+    lr: 5e-4
+    betas: [0.9, 0.999]
+    eps: 1e-08
+    weight_decay: 0.0
+    amsgrad: false

configs/training/scenedino.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+defaults:
+  - optimizer: scenedino
+  - scheduler: scenedino
+  - loss: scenedino
+  - _self_
+num_epochs: 50
+continue: false
+checkpoint_every: 10000
+log_every_iters: 100
+ray_sampler:
+  z_near: 3
+  z_far: 80
+  sample_mode: "patch"
+  args:
+    patch_size: 16
+    ray_batch_size: 2048
+    snap_to_grid: true
+    dino_upscaled: false

configs/training/scheduler/scenedino.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+type: step
+step_size: 100000
+gamma: 0.1

configs/training/semantic.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+defaults:
+  - optimizer: semantic
+  - loss: semantic
+  - _self_
+num_epochs: 1
+epoch_length: 2500
+continue: false
+checkpoint_every: 5000
+log_every_iters: 250
+ray_sampler:
+  z_near: 3
+  z_far: 80
+  sample_mode: "image"

configs/validation/scenedino.yaml ADDED Viewed

	@@ -0,0 +1,78 @@

+validation:
+  metrics:
+    - type: depth
+      args: null
+    - type: dino
+      args: null
+  subset:
+    type: range
+    args:
+      start: 0
+      end: 128
+  save_best:
+    metric: dino_cos_sim
+    sign: 1
+  log_loss: false
+  global_step:
+    type: "trainer iteration"
+  events:
+    # - type: STARTED
+    #   args: null
+    - type: ITERATION_COMPLETED
+      args:
+        every: 5000
+    # - type: EPOCH_COMPLETED
+    #   args:
+    #     every: 1
+    - type: COMPLETED
+      args: null
+visualization:
+  metrics:
+    - type: depth
+      args: null
+  subset:
+    type: range
+    args:
+      start: 200
+      end: 201
+  visualize:
+    input_imgs: null
+    reconstructed_imgs: null
+    reconstruction_rmse: null
+    dino_gt: null
+    reconstructed_dino: null
+    reconstructed_dino_downsampled: null
+    batch_dino_gt: null
+    batch_dino_artifacts: null
+    batch_dino_features_kmeans: null
+    batch_dino_gt_kmeans: null
+    batch_reconstructed_dino: null
+    batch_reconstructed_dino_downsampled: null
+    dino_downsampling_salience: null
+    dino_downsampling_weight: null
+    dino_downsampling_per_patch_weight: null
+    dino_cos_sim_downsampled: null
+    depth: null
+    depth_profile: null
+    alpha_sum: null
+    ray_entropy: null
+    ray_entropy_weights: null
+    invalids: null
+    rendered_flow: null
+    predicted_occlusions: null
+    uncertainty: null
+  log_loss: false
+  global_step:
+    type: "trainer iteration"
+  events:
+    - type: STARTED
+      args: null
+    - type: ITERATION_COMPLETED
+      args:
+        every: 5000
+    # - type: EPOCH_COMPLETED
+    #   args:
+    #     every: 1
+    - type: COMPLETED
+      args: null

configs/validation/semantic.yaml ADDED Viewed

	@@ -0,0 +1,80 @@

+validation:
+  metrics:
+    - type: seg
+      agg_type: unsup_seg
+      args:
+        n_classes: 19
+        gt_classes: 19
+    - type: stego
+      agg_type: concat
+  subset:
+    type: random
+    args:
+      size: 32
+  save_best:
+    metric: "stego_cluster_weighted_miou"
+    update_model: true
+    dry_run: false
+  log_loss: false
+  global_step:
+    type: "trainer iteration"
+  events:
+    # - type: STARTED
+    #   args: null
+    - type: ITERATION_COMPLETED
+      args:
+        every: 100
+    # - type: EPOCH_COMPLETED
+    #   args:
+    #     every: 1
+    # - type: COMPLETED
+    #   args: null
+visualization_seg:
+  metrics: {}
+  subset:
+    type: range
+    args:
+      start: 300
+      end: 301
+  visualize:
+    input_imgs: null
+    # reconstructed_imgs: null
+    # reconstruction_rmse: null
+    dino_gt: null
+    # reconstructed_dino: null
+    # reconstructed_dino_downsampled: null
+    batch_dino_gt: null
+    batch_dino_artifacts: null
+    segs_gt: null
+    segs_pred: null
+    batch_reconstructed_dino: null
+    batch_dino_features_kmeans: null
+    # batch_reconstructed_dino_downsampled: null
+    # dino_downsampling_salience: null
+    # dino_downsampling_weight: null
+    # dino_downsampling_per_patch_weight: null
+    # dino_cos_sim_downsampled: null,
+    depth: null
+    # depth_profile: null
+    # alpha_sum: null
+    # ray_entropy: null
+    # ray_entropy_weights: null
+    # invalids: null
+    # rendered_flow: null
+    # predicted_occlusions: null
+    # uncertainty: null
+  log_loss: false
+  global_step:
+    type: "trainer iteration"
+  events:
+    - type: STARTED
+      args: null
+    - type: ITERATION_COMPLETED
+      args:
+        every: 100
+    # - type: EPOCH_COMPLETED
+    #   args:
+    #     every: 1
+    # - type: COMPLETED
+    #   args: null

datasets/__init__.py ADDED Viewed

File without changes

datasets/bdd/bdd_dataset.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import numpy as np
+import time
+import torch
+import os
+from PIL import Image
+from torchvision import transforms
+from torch.utils.data import Dataset
+from collections import namedtuple
+from datasets.kitti_360.labels import trainId2label
+Label = namedtuple(
+    "Label",
+    [
+        "name",
+        "id",
+        "trainId",
+        "category",
+        "categoryId",
+        "hasInstances",
+        "ignoreInEval",
+        "color",
+        "to_cs27",
+    ],
+)
+BDD_LABEL = [
+    Label("unlabeled", 0, 255, "void", 0, False, True, (0, 0, 0), 255),
+    Label("dynamic", 1, 255, "void", 0, False, True, (111, 74, 0), 255),
+    Label("ego vehicle", 2, 255, "void", 0, False, True, (0, 0, 0), 255),
+    Label("ground", 3, 255, "void", 0, False, True, (81, 0, 81), 255),
+    Label("static", 4, 255, "void", 0, False, True, (0, 0, 0), 255),
+    Label("parking", 5, 255, "flat", 1, False, True, (250, 170, 160), 2),
+    Label("rail track", 6, 255, "flat", 1, False, True, (230, 150, 140), 3),
+    Label("road", 7, 0, "flat", 1, False, False, (128, 64, 128), 0),
+    Label("sidewalk", 8, 1, "flat", 1, False, False, (244, 35, 232), 1),
+    Label("bridge", 9, 255, "construction", 2, False, True, (150, 100, 100), 8),
+    Label("building", 10, 2, "construction", 2, False, False, (70, 70, 70), 4),
+    Label("fence", 11, 4, "construction", 2, False, False, (190, 153, 153), 6),
+    Label("garage", 12, 255, "construction", 2, False, True, (180, 100, 180), 255),
+    Label("guard rail", 13, 255, "construction", 2, False, True, (180, 165, 180), 7),
+    Label("tunnel", 14, 255, "construction", 2, False, True, (150, 120, 90), 9),
+    Label("wall", 15, 3, "construction", 2, False, False, (102, 102, 156), 5),
+    Label("banner", 16, 255, "object", 3, False, True, (250, 170, 100), 255),
+    Label("billboard", 17, 255, "object", 3, False, True, (220, 220, 250), 255),
+    Label("lane divider", 18, 255, "object", 3, False, True, (255, 165, 0), 255),
+    Label("parking sign", 19, 255, "object", 3, False, False, (220, 20, 60), 255),
+    Label("pole", 20, 5, "object", 3, False, False, (153, 153, 153), 10),
+    Label("polegroup", 21, 255, "object", 3, False, True, (153, 153, 153), 11),
+    Label("street light", 22, 255, "object", 3, False, True, (220, 220, 100), 255),
+    Label("traffic cone", 23, 255, "object", 3, False, True, (255, 70, 0), 255),
+    Label("traffic device", 24, 255, "object", 3, False, True, (220, 220, 220), 255),
+    Label("traffic light", 25, 6, "object", 3, False, False, (250, 170, 30), 12),
+    Label("traffic sign", 26, 7, "object", 3, False, False, (220, 220, 0), 13),
+    Label("traffic sign frame", 27, 255, "object", 3, False, True, (250, 170, 250), 255),
+    Label("terrain", 28, 9, "nature", 4, False, False, (152, 251, 152), 15),
+    Label("vegetation", 29, 8, "nature", 4, False, False, (107, 142, 35), 14),
+    Label("sky", 30, 10, "sky", 5, False, False, (70, 130, 180), 16),
+    Label("person", 31, 11, "human", 6, True, False, (220, 20, 60), 17),
+    Label("rider", 32, 12, "human", 6, True, False, (255, 0, 0), 18),
+    Label("bicycle", 33, 18, "vehicle", 7, True, False, (119, 11, 32), 26),
+    Label("bus", 34, 15, "vehicle", 7, True, False, (0, 60, 100), 21),
+    Label("car", 35, 13, "vehicle", 7, True, False, (0, 0, 142), 19),
+    Label("caravan", 36, 255, "vehicle", 7, True, True, (0, 0, 90), 22),
+    Label("motorcycle", 37, 17, "vehicle", 7, True, False, (0, 0, 230), 25),
+    Label("trailer", 38, 255, "vehicle", 7, True, True, (0, 0, 110), 23),
+    Label("train", 39, 16, "vehicle", 7, True, False, (0, 80, 100), 24),
+    Label("truck", 40, 14, "vehicle", 7, True, False, (0, 0, 70), 20),
+]
+def resize_with_padding(img, target_size, padding_value, interpolation):
+    target_h, target_w = target_size
+    width, height = img.size
+    aspect = width / height
+    if aspect > (target_w / target_h):
+        new_w = target_w
+        new_h = int(target_w / aspect)
+    else:
+        new_h = target_h
+        new_w = int(target_h * aspect)
+    img = transforms.functional.resize(img, (new_h, new_w), interpolation)
+    pad_h = target_h - new_h
+    pad_w = target_w - new_w
+    padding = (pad_w // 2, pad_h // 2, pad_w - pad_w // 2, pad_h - pad_h // 2)
+    return transforms.functional.pad(img, padding, fill=padding_value)
+class BDDSeg(Dataset):
+    def __init__(self, root, image_set, image_size=(192, 640)):
+        super(BDDSeg, self).__init__()
+        self.split = image_set
+        self.root = root
+        self.image_transform = transforms.Compose([
+            #transforms.Lambda(lambda img: resize_with_padding(img, image_size, padding_value=0, interpolation=transforms.InterpolationMode.BILINEAR)),
+            transforms.Resize((320, 640), interpolation=transforms.InterpolationMode.BILINEAR),
+            transforms.CenterCrop(image_size),
+            transforms.ToTensor(),
+        ])
+        self.target_transform = transforms.Compose([
+            #transforms.Lambda(lambda img: resize_with_padding(img, image_size, padding_value=-1, interpolation=transforms.InterpolationMode.NEAREST)),
+            transforms.Resize((320, 640), interpolation=transforms.InterpolationMode.NEAREST),
+            transforms.CenterCrop(image_size),
+            transforms.PILToTensor(),
+            transforms.Lambda(lambda x: x.long()),
+        ])
+        self.images, self.targets = [], []
+        image_dir = os.path.join(self.root, "images/10k", self.split)
+        target_dir = os.path.join(self.root, "labels/pan_seg/bitmasks", self.split)
+        for file_name in os.listdir(image_dir):
+            image_path = os.path.join(image_dir, file_name)
+            target_filename = os.path.splitext(file_name)[0] + ".png"
+            target_path = os.path.join(target_dir, target_filename)
+            assert os.path.isfile(target_path)
+            self.images.append(image_path)
+            self.targets.append(target_path)
+        self.class_mapping = torch.Tensor([trainId2label[c.trainId].id for c in BDD_LABEL]).int()
+    def __getitem__(self, index):
+        _start_time = time.time()
+        image = Image.open(self.images[index]).convert("RGB")
+        target = Image.open(self.targets[index])
+        image = self.image_transform(image)
+        target = self.target_transform(target)
+        image = 2.0 * image - 1.0
+        poses = torch.eye(4)        # (4, 4)
+        projs = torch.eye(3)        # (3, 3)
+        target = target[0]  # ("instance", "semantic", "polygon", "color")
+        target = self.class_mapping[target]
+        _proc_time = time.time() - _start_time
+        data = {
+            "imgs": [image.numpy()],
+            "poses": [poses.numpy()],
+            "projs": [projs.numpy()],
+            "segs": [target.numpy()],
+            "t__get_item__": np.array([_proc_time]),
+            "index": [np.array([index])],
+        }
+        return data
+    def __len__(self):
+        return len(self.images)

datasets/cityscapes/cityscapes_dataset.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import numpy as np
+import time
+import torch
+from torchvision import transforms
+from torchvision.datasets.cityscapes import Cityscapes
+from torch.utils.data import Dataset
+def resize_with_padding(img, target_size, padding_value, interpolation):
+    target_h, target_w = target_size
+    width, height = img.size
+    aspect = width / height
+    if aspect > (target_w / target_h):
+        new_w = target_w
+        new_h = int(target_w / aspect)
+    else:
+        new_h = target_h
+        new_w = int(target_h * aspect)
+    img = transforms.functional.resize(img, (new_h, new_w), interpolation)
+    pad_h = target_h - new_h
+    pad_w = target_w - new_w
+    padding = (pad_w // 2, pad_h // 2, pad_w - pad_w // 2, pad_h - pad_h // 2)
+    return transforms.functional.pad(img, padding, fill=padding_value)
+class CityscapesSeg(Dataset):
+    def __init__(self, root, image_set, image_size=(192, 640)):
+        super(CityscapesSeg, self).__init__()
+        self.split = image_set
+        self.root = root
+        transform = transforms.Compose([
+            #transforms.Lambda(lambda img: resize_with_padding(img, image_size, padding_value=0, interpolation=transforms.InterpolationMode.BILINEAR)),
+            transforms.Resize((320, 640), interpolation=transforms.InterpolationMode.BILINEAR),
+            transforms.CenterCrop(image_size),
+            transforms.ToTensor(),
+        ])
+        target_transform = transforms.Compose([
+            #transforms.Lambda(lambda img: resize_with_padding(img, image_size, padding_value=-1, interpolation=transforms.InterpolationMode.NEAREST)),
+            transforms.Resize((320, 640), interpolation=transforms.InterpolationMode.NEAREST),
+            transforms.CenterCrop(image_size),
+            transforms.PILToTensor(),
+            transforms.Lambda(lambda x: x.long()),
+        ])
+        self.inner_loader = Cityscapes(self.root, image_set,
+                                       mode="fine",
+                                       target_type="semantic",
+                                       transform=transform,
+                                       target_transform=target_transform)
+    def __getitem__(self, index):
+        _start_time = time.time()
+        image, target = self.inner_loader[index]  # (3, h, w) / (1, h, w)
+        image = 2.0 * image - 1.0
+        poses = torch.eye(4)        # (4, 4)
+        projs = torch.eye(3)        # (3, 3)
+        target = target.squeeze(0)  # (h, w)
+        _proc_time = time.time() - _start_time
+        data = {
+            "imgs": [image.numpy()],
+            "poses": [poses.numpy()],
+            "projs": [projs.numpy()],
+            "segs": [target.numpy()],
+            "t__get_item__": np.array([_proc_time]),
+            "index": [np.array([index])],
+        }
+        return data
+    def __len__(self):
+        return len(self.inner_loader)

datasets/data_util.py ADDED Viewed

	@@ -0,0 +1,307 @@

+import os
+from datasets.kitti_360.kitti_360_dataset import Kitti360Dataset
+from datasets.kitti_odom.kitti_odometry_dataset import KittiOdometryDataset
+from datasets.kitti_raw.kitti_raw_dataset import KittiRawDataset
+from datasets.nyu_depth_v2.nyu_depth_v2_dataset import NYUDepthV2Dataset
+from datasets.realestate10k.realestate10k_dataset import RealEstate10kDataset
+from datasets.waymo.waymo_dataset import WaymoDataset
+def make_datasets(config):
+    type = config.get("type", "KITTI_Raw")
+    if type == "KITTI_Odometry":
+        train_dataset = KittiOdometryDataset(
+            base_path=config["data_path"],
+            frame_count=config.get("data_fc", 1),
+            target_image_size=config.get("image_size", (128, 256)),
+            return_stereo=config.get("data_stereo", False),
+            sequences=config.get("train_sequences", ("00",)),
+            custom_pose_path=config.get("custom_pose_path", None),
+            keyframe_offset=0 #-(config.get("data_fc", 1) // 2)
+        )
+        test_dataset = KittiOdometryDataset(
+            base_path=config["data_path"],
+            frame_count=config.get("data_fc", 1),
+            target_image_size=config.get("image_size", (128, 256)),
+            return_stereo=config.get("data_stereo", False),
+            sequences=config.get("val_sequences", ("00",)),
+            custom_pose_path=config.get("custom_pose_path", None),
+            keyframe_offset=0 #-(config.get("data_fc", 1) // 2)
+        )
+        return train_dataset, test_dataset
+    elif type == "KITTI_Raw":
+        train_dataset = KittiRawDataset(
+            data_path=config["data_path"],
+            pose_path=config["pose_path"],
+            split_path=os.path.join(config["split_path"], "train_files.txt"),
+            target_image_size=config.get("image_size", (192, 640)),
+            frame_count=config.get("data_fc", 1),
+            return_stereo=config.get("data_stereo", False),
+            keyframe_offset=config.get("keyframe_offset", 0),
+            dilation=config.get("dilation", 1),
+            color_aug=config.get("color_aug", False)
+        )
+        test_dataset = KittiRawDataset(
+            data_path=config["data_path"],
+            pose_path=config["pose_path"],
+            split_path=os.path.join(config["split_path"], "val_files.txt"),
+            target_image_size=config.get("image_size", (192, 640)),
+            frame_count=config.get("data_fc", 1),
+            return_stereo=config.get("data_stereo", False),
+            keyframe_offset=config.get("keyframe_offset", 0),
+            dilation=config.get("dilation", 1),
+        )
+        return train_dataset, test_dataset
+    elif type == "KITTI_360":
+        if config.get("split_path", None) is None:
+            train_split_path = None
+            test_split_path = None
+        else:
+            train_split_path = os.path.join(config["split_path"], "train_files.txt")
+            test_split_path = os.path.join(config["split_path"], "val_files.txt")
+        train_dataset = Kitti360Dataset(
+            data_path=config["data_path"],
+            pose_path=config["pose_path"],
+            split_path=train_split_path,
+            target_image_size=tuple(config.get("image_size", (192, 640))),
+            frame_count=config.get("data_fc", 3),
+            return_stereo=config.get("data_stereo", True),
+            return_fisheye=config.get("data_fisheye", True),
+            return_3d_bboxes=config.get("data_3d_bboxes", False),
+            return_segmentation=config.get("data_segmentation", False),
+            keyframe_offset=config.get("keyframe_offset", 0),
+            dilation=config.get("dilation", 1),
+            fisheye_rotation=config.get("fisheye_rotation", 0),
+            fisheye_offset=config.get("fisheye_offset", 1),
+            color_aug=config.get("color_aug", False),
+            is_preprocessed=config.get("is_preprocessed", False)
+        )
+        test_dataset = Kitti360Dataset(
+            data_path=config["data_path"],
+            pose_path=config["pose_path"],
+            split_path=test_split_path,
+            target_image_size=tuple(config.get("image_size", (192, 640))),
+            frame_count=config.get("data_fc", 3),
+            return_stereo=config.get("data_stereo", True),
+            return_fisheye=config.get("data_fisheye", True),
+            return_3d_bboxes=config.get("data_3d_bboxes", False),
+            return_segmentation=config.get("data_segmentation", False),
+            keyframe_offset=config.get("keyframe_offset", 0),
+            fisheye_rotation=config.get("fisheye_rotation", 0),
+            fisheye_offset=config.get("fisheye_offset", 1),
+            dilation=config.get("dilation", 1),
+            is_preprocessed=config.get("is_preprocessed", False)
+        )
+        return train_dataset, test_dataset
+    elif type == "RealEstate10k":
+        train_dataset = RealEstate10kDataset(
+            data_path=config["data_path"],
+            split_path=None,
+            target_image_size=config.get("image_size", (256, 384)),
+            frame_count=config.get("data_fc", 2),
+            keyframe_offset=0, #-(config.get("data_fc", 1) // 2),
+            dilation=config.get("dilation", 10),
+            color_aug=config.get("color_aug", False)
+        )
+        test_dataset = RealEstate10kDataset(
+            data_path=config["data_path"],
+            split_path=os.path.join(config["split_path"], "val_files.txt"),
+            target_image_size=config.get("image_size", (256, 384)),
+            frame_count=config.get("data_fc", 2),
+            keyframe_offset=0, #-(config.get("data_fc", 1) // 2),
+            dilation=config.get("dilation", 10),
+            color_aug=False
+        )
+        return train_dataset, test_dataset
+    elif type == "Waymo":
+        if config.get("split_path", None) is None:
+            train_split_path = None
+            test_split_path = None
+        else:
+            train_split_path = os.path.join(config["split_path"], "train_files.txt")
+            test_split_path = os.path.join(config["split_path"], "val_files.txt")
+        train_dataset = WaymoDataset(
+            data_path=config["data_path"],
+            mode="training",
+            split_path=train_split_path,
+            target_image_size=tuple(config.get("image_size", (320, 480))),
+            frame_count=config.get("data_fc", 2),
+            keyframe_offset=config.get("keyframe_offset", 0),
+            return_45=config.get("return_45", True),
+            return_90=config.get("return_90", True),
+            offset_45=config.get("offset_45", 5),
+            offset_90=config.get("offset_90", 10),
+            dilation=config.get("dilation", 1),
+            color_aug=config.get("color_aug", True),
+            correct_exposure=config.get("correct_exposure", True),
+        )
+        test_dataset = WaymoDataset(
+            data_path=config["data_path"],
+            mode="validation",
+            split_path=test_split_path,
+            target_image_size=tuple(config.get("image_size", (320, 480))),
+            frame_count=config.get("data_fc", 2),
+            keyframe_offset=config.get("keyframe_offset", 0),
+            return_45=config.get("return_45", True),
+            return_90=config.get("return_90", True),
+            offset_45=config.get("offset_45", 5),
+            offset_90=config.get("offset_90", 10),
+            dilation=config.get("dilation", 1),
+            color_aug=False,
+            return_depth=True,
+            correct_exposure=config.get("correct_exposure", True),
+        )
+        return train_dataset, test_dataset
+    elif type == "KITTI_Raw_DFT":
+        train_dataset = KittiRawDataset(
+            data_path=config["data_path"],
+            pose_path=config["pose_path"],
+            split_path=os.path.join(config["split_path"], "train_files.txt"),
+            target_image_size=config.get("image_size", (192, 640)),
+            frame_count=config.get("data_fc", 1),
+            return_stereo=config.get("data_stereo", False),
+            keyframe_offset=config.get("keyframe_offset", 0),
+            dilation=config.get("dilation", 1),
+            color_aug=config.get("color_aug", False)
+        )
+        test_dataset = KittiRawDataset(
+            data_path=config["data_path"],
+            pose_path=config["pose_path"],
+            split_path=os.path.join(config["split_path"], "val_files.txt"),
+            target_image_size=config.get("image_size", (192, 640)),
+            frame_count=config.get("data_fc", 1),
+            return_stereo=config.get("data_stereo", False),
+            keyframe_offset=config.get("keyframe_offset", 0),
+            dilation=config.get("dilation", 1),
+        )
+        return train_dataset, test_dataset
+    elif type == "KITTI_360_DFT":
+        if config.get("split_path", None) is None:
+            train_split_path = None
+            test_split_path = None
+        else:
+            train_split_path = os.path.join(config["split_path"], "train_files.txt")
+            test_split_path = os.path.join(config["split_path"], "val_files.txt")
+        train_dataset = Kitti360Dataset(
+            data_path=config["data_path"],
+            pose_path=config["pose_path"],
+            split_path=train_split_path,
+            target_image_size=tuple(config.get("image_size", (192, 640))),
+            frame_count=config.get("data_fc", 3),
+            return_stereo=config.get("data_stereo", True),
+            return_fisheye=config.get("data_fisheye", True),
+            return_3d_bboxes=config.get("data_3d_bboxes", False),
+            return_segmentation=config.get("data_segmentation", False),
+            keyframe_offset=config.get("keyframe_offset", 0),
+            dilation=config.get("dilation", 1),
+            fisheye_rotation=config.get("fisheye_rotation", 0),
+            fisheye_offset=config.get("fisheye_offset", 1),
+            stereo_offset=config.get("stereo_offset", 1),
+            color_aug=config.get("color_aug", False),
+            is_preprocessed=config.get("is_preprocessed", False)
+        )
+        test_dataset = Kitti360Dataset(
+            data_path=config["data_path"],
+            pose_path=config["pose_path"],
+            split_path=test_split_path,
+            target_image_size=tuple(config.get("image_size", (192, 640))),
+            frame_count=config.get("data_fc", 3),
+            return_stereo=config.get("data_stereo", True),
+            return_fisheye=config.get("data_fisheye", True),
+            return_3d_bboxes=config.get("data_3d_bboxes", False),
+            return_segmentation=config.get("data_segmentation", False),
+            keyframe_offset=config.get("keyframe_offset", 0),
+            fisheye_rotation=config.get("fisheye_rotation", 0),
+            fisheye_offset=config.get("fisheye_offset", [10])[0],        ## this modifies the offsets for all datasets including the training dataset
+            stereo_offset= config.get("stereo_offset", [1])[0],         ## This is to set consistent evaluation with test and viz
+            dilation=config.get("dilation", 1),
+            is_preprocessed=config.get("is_preprocessed", False)
+        )
+        return train_dataset, test_dataset
+    else:
+        raise NotImplementedError(f"Unsupported dataset type: {type}")
+def make_test_dataset(config):
+    type = config.get("type", "KITTI_Raw")
+    if type == "KITTI_Raw":
+        test_dataset = KittiRawDataset(
+            data_path=config["data_path"],
+            pose_path=config["pose_path"],
+            split_path=os.path.join(config["split_path"], "test_files.txt"),
+            target_image_size=config.get("image_size", (192, 640)),
+            return_depth=True,
+            frame_count=config.get("data_fc", 1),
+            return_stereo=config.get("data_stereo", False),
+            keyframe_offset=0
+        )
+        return test_dataset
+    elif type == "KITTI_360":
+        test_dataset = Kitti360Dataset(
+            data_path=config["data_path"],
+            pose_path=config["pose_path"],
+            split_path=os.path.join(config.get("split_path", None), "test_files.txt"),
+            target_image_size=tuple(config.get("image_size", (192, 640))),
+            frame_count=config.get("data_fc", 1),
+            return_stereo=config.get("data_stereo", False),
+            return_fisheye=config.get("data_fisheye", False),
+            return_3d_bboxes=config.get("data_3d_bboxes", False),
+            return_segmentation=config.get("data_segmentation", False),
+            keyframe_offset=0,
+            fisheye_rotation=config.get("fisheye_rotation", 0),
+            fisheye_offset=config.get("fisheye_offset", 1),
+            dilation=config.get("dilation", 1),
+            is_preprocessed=config.get("is_preprocessed", False)
+        )
+        return test_dataset
+    elif type == "KITTI_360_DFT":
+        test_dataset = Kitti360Dataset(
+            data_path=config["data_path"],
+            pose_path=config["pose_path"],
+            split_path=os.path.join(config.get("split_path", None), "test_files.txt"),
+            target_image_size=tuple(config.get("image_size", (192, 640))),
+            frame_count=config.get("data_fc", 1),
+            return_stereo=config.get("data_stereo", False),
+            return_fisheye=config.get("data_fisheye", False),
+            return_3d_bboxes=config.get("data_3d_bboxes", False),
+            return_segmentation=config.get("data_segmentation", False),
+            keyframe_offset=0,
+            fisheye_rotation=config.get("fisheye_rotation", 0),
+            fisheye_offset=config.get("fisheye_offset", [10])[0],        ## this modifies the offsets for all datasets including the training dataset
+            stereo_offset= config.get("stereo_offset", [1])[0],         ## This is to set consistent evaluation with test and viz
+            dilation=config.get("dilation", 1),
+            is_preprocessed=config.get("is_preprocessed", False),
+            return_depth=True
+        )
+        return test_dataset
+    elif type == "RealEstate10k":
+        test_dataset = RealEstate10kDataset(
+            data_path=config["data_path"],
+            split_path=os.path.join(config["split_path"], "test_files.txt"),
+            target_image_size=config.get("image_size", (256, 384)),
+            frame_count=config.get("data_fc", 2),
+            keyframe_offset=0,
+            dilation=config.get("dilation", 10),
+            color_aug=False
+        )
+        return test_dataset
+    elif type == "NYU_Depth_V2":
+        test_dataset = NYUDepthV2Dataset(
+            data_path=config["data_path"],
+            target_image_size=config.get("image_size", (256, 384)),
+        )
+        return test_dataset
+    else:
+        raise NotImplementedError(f"Unsupported dataset type: {type}")

datasets/kitti_360/__init__.py ADDED Viewed

File without changes

datasets/kitti_360/annotation.py ADDED Viewed

	@@ -0,0 +1,538 @@

+#!/usr/bin/python
+#
+from __future__ import print_function, absolute_import, division
+import glob
+import json
+import os
+import struct
+import xml.etree.ElementTree as ET
+from collections import defaultdict
+from collections import namedtuple
+import numpy as np
+from matplotlib import cm
+from skimage import io, filters
+# get current date and time
+# A point in a polygon
+Point = namedtuple('Point', ['x', 'y'])
+from abc import ABCMeta
+from datasets.kitti_360.labels import labels, id2label, kittiId2label, name2label
+MAX_N = 1000
+def local2global(semanticId, instanceId):
+    globalId = semanticId*MAX_N + instanceId
+    if isinstance(globalId, np.ndarray):
+        return globalId.astype(np.int)
+    else:
+        return int(globalId)
+def global2local(globalId):
+    semanticId = globalId // MAX_N
+    instanceId = globalId % MAX_N
+    if isinstance(globalId, np.ndarray):
+        return semanticId.astype(np.int), instanceId.astype(np.int)
+    else:
+        return int(semanticId), int(instanceId)
+annotation2global = defaultdict()
+# Abstract base class for annotation objects
+class KITTI360Object:
+    __metaclass__ = ABCMeta
+    def __init__(self):
+        # the label
+        self.label    = ""
+        # colormap
+        self.cmap = cm.get_cmap('Set1')
+        self.cmap_length = 9
+    def getColor(self, idx):
+        if idx==0:
+            return np.array([0,0,0])
+        return np.asarray(self.cmap(idx % self.cmap_length)[:3])*255.
+    def assignColor(self):
+        if self.semanticId>=0:
+            self.semanticColor = id2label[self.semanticId].color
+            if self.instanceId>0:
+                self.instanceColor = self.getColor(self.instanceId)
+            else:
+                self.instanceColor = self.semanticColor
+# Class that contains the information of a single annotated object as 3D bounding box
+class KITTI360Bbox3D(KITTI360Object):
+    # Constructor
+    def __init__(self):
+        KITTI360Object.__init__(self)
+        # the polygon as list of points
+        self.vertices  = []
+        self.faces  = []
+        self.lines = [[0,5],[1,4],[2,7],[3,6],
+                      [0,1],[1,3],[3,2],[2,0],
+                      [4,5],[5,7],[7,6],[6,4]]
+        # the ID of the corresponding object
+        self.semanticId = -1
+        self.instanceId = -1
+        self.annotationId = -1
+        # the window that contains the bbox
+        self.start_frame = -1
+        self.end_frame = -1
+        # timestamp of the bbox (-1 if statis)
+        self.timestamp = -1
+        # projected vertices
+        self.vertices_proj = None
+        self.meshes = []
+        # name
+        self.name = ''
+    def __str__(self):
+        return self.name
+    def generateMeshes(self):
+        self.meshes = []
+        if self.vertices_proj:
+            for fidx in range(self.faces.shape[0]):
+                self.meshes.append( [ Point(self.vertices_proj[0][int(x)], self.vertices_proj[1][int(x)]) for x in self.faces[fidx]] )
+    def parseOpencvMatrix(self, node):
+        rows = int(node.find('rows').text)
+        cols = int(node.find('cols').text)
+        data = node.find('data').text.split(' ')
+        mat = []
+        for d in data:
+            d = d.replace('\n', '')
+            if len(d)<1:
+                continue
+            mat.append(float(d))
+        mat = np.reshape(mat, [rows, cols])
+        return mat
+    def parseVertices(self, child):
+        transform = self.parseOpencvMatrix(child.find('transform'))
+        R = transform[:3,:3]
+        T = transform[:3,3]
+        vertices = self.parseOpencvMatrix(child.find('vertices'))
+        faces = self.parseOpencvMatrix(child.find('faces'))
+        vertices = np.matmul(R, vertices.transpose()).transpose() + T
+        self.vertices = vertices
+        self.faces = faces
+        self.R = R
+        self.T = T
+    def parseBbox(self, child):
+        semanticIdKITTI = int(child.find('semanticId').text)
+        self.semanticId = kittiId2label[semanticIdKITTI].id
+        self.instanceId = int(child.find('instanceId').text)
+        self.name = kittiId2label[semanticIdKITTI].name
+        self.start_frame = int(child.find('start_frame').text)
+        self.end_frame = int(child.find('end_frame').text)
+        self.timestamp = int(child.find('timestamp').text)
+        self.annotationId = int(child.find('index').text) + 1
+        global annotation2global
+        annotation2global[self.annotationId] = local2global(self.semanticId, self.instanceId)
+        self.parseVertices(child)
+    def parseStuff(self, child):
+        classmap = {'driveway': 'parking', 'ground': 'terrain', 'unknownGround': 'ground',
+                    'railtrack': 'rail track', 'bigPole': 'pole', 'unknownObject': 'unknown object',
+                    'smallPole': 'smallpole', 'trafficSign': 'traffic sign', 'trashbin': 'trash bin',
+                    'guardrail': 'guard rail', 'trafficLight': 'traffic light', 'pedestrian': 'person',
+                    'vendingmachine': 'vending machine', 'unknownConstruction': 'unknown construction',
+                    'unknownVehicle': 'unknown vehicle'}
+        label = child.find('label').text
+        if label in classmap.keys():
+            label = classmap[label]
+        self.start_frame = int(child.find('start_frame').text)
+        self.end_frame = int(child.find('end_frame').text)
+        self.timestamp = int(child.find('timestamp').text)
+        self.semanticId = name2label[label].id
+        self.name = label
+        self.parseVertices(child)
+# Class that contains the information of the point cloud a single frame
+class KITTI360Point3D(KITTI360Object):
+    # Constructor
+    def __init__(self):
+        KITTI360Object.__init__(self)
+        self.vertices = []
+        self.vertices_proj = None
+        # the ID of the corresponding object
+        self.semanticId = -1
+        self.instanceId = -1
+        self.annotationId = -1
+        # name
+        self.name = ''
+        # color
+        self.semanticColor = None
+        self.instanceColor = None
+    def __str__(self):
+        return self.name
+    def generateMeshes(self):
+        pass
+# The annotation of a whole image, including semantic and instance
+class Annotation2D:
+    # Constructor
+    def __init__(self, colormap='Set1'):
+        # the width of that image and thus of the label image
+        self.imgWidth  = 0
+        # the height of that image and thus of the label image
+        self.imgHeight = 0
+        self.instanceId = None
+        self.semanticId = None
+        self.instanceImg = None
+        self.semanticImg = None
+        # savedId = semanticId*N + instanceId
+        self.N = 1000
+        # colormap
+        self.cmap = cm.get_cmap(colormap)
+        if colormap == 'Set1':
+            self.cmap_length = 9
+        else:
+            raise "Colormap length need to be specified!"
+    def getColor(self, idx):
+        if idx==0:
+            return np.array([0,0,0])
+        return np.asarray(self.cmap(idx % self.cmap_length)[:3])*255.
+    # Load confidence map
+    def loadConfidence(self, imgPath):
+        self.confidenceMap = io.imread(imgPath)
+        self.confidenceMap = np.asarray(self.confidenceMap).astype(np.float)/255.
+    # Load instance id
+    def loadInstance(self, imgPath, gtType='instance', toImg=True, contourType='instance', semanticCt=True, instanceCt=True):
+        instanceId = io.imread(imgPath)
+        self.instanceId = np.asarray( instanceId % self.N  )
+        self.semanticId = np.asarray( instanceId // self.N )
+        if not toImg:
+            return
+        if gtType=='semantic':
+            self.toSemanticImage()
+        elif gtType=='instance':
+            self.toInstanceImage()
+        if semanticCt or instanceCt:
+            self.getBoundary()
+        if gtType=='semantic' and semanticCt:
+            boundaryImg = self.toBoundaryImage(contourType=contourType, instanceOnly=False)
+            self.semanticImg = self.semanticImg * (1-boundaryImg) + \
+                    np.ones_like(self.semanticImg) * boundaryImg * 255
+        if gtType=='instance' and instanceCt:
+            boundaryImg = self.toBoundaryImage(contourType=contourType, instanceOnly=True)
+            self.instanceImg = self.instanceImg * (1-boundaryImg) + \
+                    np.ones_like(self.instanceImg) * boundaryImg * 255
+    def toSemanticImage(self):
+        self.semanticImg = np.zeros((self.semanticId.size, 3))
+        for label in labels:
+            mask = self.semanticId==label.id
+            mask = mask.flatten()
+            self.semanticImg[mask] = np.asarray(label.color)
+        self.semanticImg = self.semanticImg.reshape(*self.semanticId.shape, 3)
+    def toInstanceImage(self):
+        self.instanceImg = np.zeros((self.instanceId.size, 3))
+        uniqueId = np.unique(self.instanceId)
+        for uid in uniqueId:
+            mask = self.instanceId==uid
+            mask = mask.flatten()
+            self.instanceImg[mask] = np.asarray(self.getColor(uid))
+        self.instanceImg = self.instanceImg.reshape(*self.instanceId.shape, 3)
+    def getBoundary(self):
+        # semantic contours
+        uniqueId = np.unique(self.semanticId)
+        self.semanticContours = {}
+        for uid in uniqueId:
+            mask = (self.semanticId==uid).astype(np.uint8) * 255
+            mask_filter = filters.laplace(mask)
+            self.semanticContours[uid] = np.expand_dims(np.abs(mask_filter)>0, 2)
+        # instance contours
+        globalId = local2global(self.semanticId, self.instanceId)
+        uniqueId = np.unique(globalId)
+        self.instanceContours = {}
+        for uid in uniqueId:
+            mask = (globalId==uid).astype(np.uint8) * 255
+            mask_filter = filters.laplace(mask)
+            self.instanceContours[uid] = np.expand_dims(np.abs(mask_filter)>0, 2)
+    def toBoundaryImage(self, contourType='instance', instanceOnly=True):
+        if contourType=='semantic':
+            contours = self.semanticContours
+            assert(instanceOnly==False)
+        elif contourType=='instance':
+            contours = self.instanceContours
+        else:
+            raise ("Contour type can only be 'semantic' or 'instance'!")
+        if not instanceOnly:
+            boundaryImg = [contours[k] for k in contours.keys()]
+        else:
+            boundaryImg = [contours[k] for k in contours.keys() if global2local(k)[1]!=0]
+        boundaryImg = np.sum(np.asarray(boundaryImg), axis=0)
+        boundaryImg = boundaryImg>0
+        return boundaryImg
+class Annotation2DInstance:
+    def __init__(self, gtPath, cam=0):
+        # trace the instances in all images
+        self.instanceDict = defaultdict(list)
+        #
+        instanceDictCached = os.path.join(gtPath, 'instanceDict.json')
+        print(instanceDictCached)
+        if os.path.isfile(instanceDictCached) and os.path.getsize(instanceDictCached)>0:
+            cachedDict = json.load( open(instanceDictCached) )
+            for k,v in cachedDict.items():
+                self.instanceDict[int(k)] = v
+            return
+        obj = Annotation2D()
+        gtPaths = glob.glob( os.path.join(gtPath, 'instance', '*.png') )
+        print (f'Found {len(gtPaths)} label images...')
+        for i,imgPath in enumerate(gtPaths):
+            if i%1000==0:
+                print(f'Processed {i}/{len(gtPaths)} label images...')
+            obj.loadInstance(imgPath, toImg=False)
+            globalId = local2global(obj.semanticId, obj.instanceId)
+            globalIdUnique = np.unique(globalId)
+            for idx in globalIdUnique:
+                self.instanceDict[int(idx)].append(os.path.basename(imgPath))
+        json.dump( self.instanceDict, open(instanceDictCached, 'w'))
+    # returns the paths that contains the specific instance
+    def __call__(self, semanticId, instanceId):
+        globalId = local2global(semanticId, instanceId)
+        return self.instanceDict[globalId]
+# Meta class for KITTI360Bbox3D
+class Annotation3D:
+    # Constructor
+    def __init__(self, labelDir='', sequence=''):
+        labelPath = glob.glob(os.path.join(labelDir, '*', '%s.xml' % sequence)) # train or test
+        if len(labelPath)!=1:
+            raise RuntimeError('%s does not exist! Please specify KITTI360_DATASET in your environment path.' % labelPath)
+        else:
+            labelPath = labelPath[0]
+            print('Loading %s...' % labelPath)
+        self.init_instance(labelPath)
+    def init_instance(self, labelPath):
+        # load annotation
+        tree = ET.parse(labelPath)
+        root = tree.getroot()
+        self.objects = defaultdict(dict)
+        self.num_bbox = 0
+        for child in root:
+            if child.find('transform') is None:
+                continue
+            obj = KITTI360Bbox3D()
+            obj.parseBbox(child)
+            globalId = local2global(obj.semanticId, obj.instanceId)
+            self.objects[globalId][obj.timestamp] = obj
+            self.num_bbox+=1
+        globalIds = np.asarray(list(self.objects.keys()))
+        semanticIds, instanceIds = global2local(globalIds)
+        for label in labels:
+            if label.hasInstances:
+                print(f'{label.name:<30}:\t {(semanticIds==label.id).sum()}')
+        print(f'Loaded {len(globalIds)} instances')
+        print(f'Loaded {self.num_bbox} boxes')
+    def __call__(self, semanticId, instanceId, timestamp=None):
+        globalId = local2global(semanticId, instanceId)
+        if globalId in self.objects.keys():
+            # static object
+            if len(self.objects[globalId].keys())==1:
+                if -1 in self.objects[globalId].keys():
+                    return self.objects[globalId][-1]
+                else:
+                    return None
+            # dynamic object
+            else:
+                return self.objects[globalId][timestamp]
+        else:
+            return None
+class Annotation3DPly:
+    # parse fused 3D point cloud
+    def __init__(self, labelDir='', sequence='', isLabeled=True, isDynamic=False, showStatic=True):
+        if isLabeled and not isDynamic:
+            # x y z r g b semanticId instanceId isVisible confidence
+            self.fmt = '=fffBBBiiBf'
+            self.fmt_len = 28
+        elif isLabeled and isDynamic:
+            # x y z r g b semanticId instanceId isVisible timestamp confidence
+            self.fmt = '=fffBBBiiBif'
+            self.fmt_len = 32
+        elif not isLabeled and not isDynamic:
+            # x y z r g b
+            self.fmt = '=fffBBBB'
+            self.fmt_len = 16
+        else:
+            raise RuntimeError('Invalid binary format!')
+        # True for training data, False for testing data
+        self.isLabeled = isLabeled
+        # True for dynamic data, False for static data
+        self.isDynamic = isDynamic
+        # True for inspecting static data, False for inspecting dynamic data
+        self.showStatic = showStatic
+        pcdFolder = 'static' if self.showStatic else 'dynamic'
+        trainTestDir = 'train' if self.isLabeled else 'test'
+        self.pcdFileList = sorted(glob.glob(os.path.join(labelDir, trainTestDir, sequence, pcdFolder, '*.ply')))
+        print('Found %d ply files in %s' % (len(self.pcdFileList), sequence))
+    def readBinaryPly(self, pcdFile, n_pts=None):
+        with open(pcdFile, 'rb') as f:
+            plyData = f.readlines()
+        headLine = plyData.index(b'end_header\n')+1
+        plyData = plyData[headLine:]
+        plyData = b"".join(plyData)
+        n_pts_loaded = len(plyData)/self.fmt_len
+        # sanity check
+        if n_pts:
+            assert(n_pts_loaded==n_pts)
+        n_pts_loaded = int(n_pts_loaded)
+        data = []
+        for i in range(n_pts_loaded):
+            pts=struct.unpack(self.fmt, plyData[i*self.fmt_len:(i+1)*self.fmt_len])
+            data.append(pts)
+        data=np.asarray(data)
+        return data
+    def writeBinaryPly(self, pcdFile, data):
+        fmt = '=fffBBBiiB'
+        fmt_len = 24
+        n_pts = data.shape[0]
+        with open(pcdFile, 'wb') as f:
+            f.write(b'ply\n')
+            f.write(b'format binary_little_endian 1.0\n')
+            f.write(b'comment author Yiyi Liao\n')
+            f.write(b'element vertex %d\n' % n_pts)
+            f.write(b'property float x\n')
+            f.write(b'property float y\n')
+            f.write(b'property float z\n')
+            f.write(b'property uchar red\n')
+            f.write(b'property uchar green\n')
+            f.write(b'property uchar blue\n')
+            f.write(b'property int semantic\n')
+class Annotation3DInstance(object):
+    instance_id = 0
+    labelId = 0
+    vert_count = 0
+    med_dist = -1
+    dist_conf = 0.0
+    def __init__(self, mesh_vert_instances, instance_id):
+        if (instance_id == -1):
+            return
+        self.instance_id     = int(instance_id)
+        self.labelId    = int(self.get_labelId(instance_id))
+        self.vert_count = int(self.get_instance_verts(mesh_vert_instances, instance_id))
+    def get_labelId(self, instance_id):
+        return int(instance_id // 1000)
+    def get_instance_verts(self, mesh_vert_instances, instance_id):
+        return (mesh_vert_instances == instance_id).sum()
+    def to_json(self):
+        return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
+    def to_dict(self):
+        dict = {}
+        dict["instance_id"] = self.instance_id
+        dict["labelId"]    = self.labelId
+        dict["vert_count"]  = self.vert_count
+        dict["med_dist"]    = self.med_dist
+        dict["dist_conf"]   = self.dist_conf
+        return dict
+    def from_json(self, data):
+        self.instance_id     = int(data["instance_id"])
+        self.labelId        = int(data["labelId"])
+        self.vert_count      = int(data["vert_count"])
+        if ("med_dist" in data):
+            self.med_dist    = float(data["med_dist"])
+            self.dist_conf   = float(data["dist_conf"])
+    def __str__(self):
+        return "("+str(self.instance_id)+")"
+# a dummy example
+if __name__ == "__main__":
+    ann = Annotation3D()

datasets/kitti_360/compute_kitti_360_bbox_split.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import argparse
+from pathlib import Path
+DRY_RUN = False
+CUT = [7, 9, 10, 15, 19, 31, 43, 69, 87, 107, 118, 154, 156, 167, 168, 170, 171, 172, 173, 174, 178, 179, 180, 181,
+         182, 183, 184, 185, 187, 188, 193, 194, 195, 196, 201, 202, 203, 209, 210, 212, 213, 214, 215, 216, 217, 218,
+         219, 220, 221, 222, 224, 225, 226, 229, 230, 231, 234, 235, 236, 237, 238, 256, 257, 258, 267, 278, 283, 293,
+         294, 295, 296, 297, 298, 299, 310, 315, 317, 318, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333,
+         334, 335, 336, 337, 340, 341, 349, 353, 354, 361, 362, 365, 366, 368, 371, 372, 376, 380, 386, 387, 394, 402,
+         403, 404, 411, 414, 415, 416, 420, 438, 441, 448, 452, 456, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482,
+         484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 519, 520, 554, 562, 593, 594,
+         596]
+def check_integrity(data_path, seq, img_id):
+    persp = data_path / "data_2d_raw" / seq / "image_00" / "data_rect" / f"{img_id:010d}.png"
+    fish = data_path / "data_2d_raw" / seq / "image_02" / "data_rgb" / f"{img_id:010d}.png"
+    return fish.exists() and persp.exists()
+def main():
+    parser = argparse.ArgumentParser("KITTI Raw NVS Split")
+    parser.add_argument("--data_path", "-d", type=str)
+    parser.add_argument("--out_path", "-o", type=str)
+    parser.add_argument("--offset", type=int, default=20)
+    args = parser.parse_args()
+    data_path = Path(args.data_path)
+    out_path = Path(args.out_path)
+    offset = args.offset
+    print("Setting up folders...")
+    Path(out_path).mkdir(parents=True, exist_ok=True)
+    segmentation_train_file = data_path / "data_2d_semantics" / "train" / "2013_05_28_drive_train_frames.txt"
+    segmentation_val_file = data_path / "data_2d_semantics" / "train" / "2013_05_28_drive_val_frames.txt"
+    with open(segmentation_train_file, "r") as f:
+        train_lines = f.readlines()
+    with open(segmentation_val_file, "r") as f:
+        val_lines = f.readlines()
+    train_files = []
+    val_files = []
+    test_files = []
+    invalid = 0
+    for i in range(len(train_lines)):
+        parts = train_lines[i].split(" ")
+        img_path = parts[0]
+        parts = img_path.split("/")
+        sequence = parts[1]
+        img_id = int(parts[-1][-14:-4])
+        if not check_integrity(data_path, sequence, img_id):
+            invalid += 1
+            continue
+        train_files.append(f"{sequence} {img_id:010d} l")
+        train_files.append(f"{sequence} {img_id:010d} r")
+    for i in range(0, len(val_lines)):
+        parts = val_lines[i].split(" ")
+        img_path = parts[0]
+        seg_path = parts[1][:-1]
+        parts = img_path.split("/")
+        sequence = parts[1]
+        img_id = int(parts[-1][-14:-4])
+        is_test = (i % offset) == 0
+        if not check_integrity(data_path, sequence, img_id):
+            invalid += 1
+            continue
+        if not is_test:
+            val_files.append(f"{sequence} {img_id:010d} l")
+        else:
+            test_files.append(f"{sequence} {img_id:010d} l")
+    print(f"Found: Train={len(train_files)}, Val={len(val_files)}, Test={len(test_files)} test files.")
+    print(f"Found: {invalid} invalids.")
+    test_files = [s for i, s in enumerate(test_files) if not i in CUT]
+    print(f"{len(CUT)} test files removed. {len(test_files)} remaining.")
+    train_file = out_path / f"train_files.txt"
+    val_file = out_path / f"val_files.txt"
+    test_file = out_path / f"test_files.txt"
+    with open(train_file, "w") as f:
+        f.writelines("\n".join(train_files))
+    with open(val_file, "w") as f:
+        f.writelines("\n".join(val_files))
+    with open(test_file, "w") as f:
+        f.writelines("\n".join(test_files))
+if __name__ == "__main__":
+    main()

datasets/kitti_360/kitti_360_dataset.py ADDED Viewed

	@@ -0,0 +1,1263 @@

+import os
+import time
+import xml.etree.ElementTree as ET
+from collections import Counter, defaultdict
+from pathlib import Path
+from typing import Optional
+import cv2
+import numpy as np
+import torch
+import torch.nn.functional as F
+import yaml
+from scipy.spatial.transform import Rotation
+from torch.utils.data import Dataset
+from torchvision.transforms import ColorJitter
+from datasets.kitti_360.annotation import KITTI360Bbox3D
+from scenedino.common.augmentation import get_color_aug_fn
+import omegaconf
+class FisheyeToPinholeSampler:
+    def __init__(self, K_target, target_image_size, calibs, rotation=None):
+        self._compute_transform(K_target, target_image_size, calibs, rotation)
+    def _compute_transform(self, K_target, target_image_size, calibs, rotation=None):
+        x = (
+            torch.linspace(-1, 1, target_image_size[1])
+            .view(1, -1)
+            .expand(target_image_size)
+        )
+        y = (
+            torch.linspace(-1, 1, target_image_size[0])
+            .view(-1, 1)
+            .expand(target_image_size)
+        )
+        z = torch.ones_like(x)
+        xyz = torch.stack((x, y, z), dim=-1).view(-1, 3)
+        # Unproject
+        xyz = (torch.inverse(torch.tensor(K_target)) @ xyz.T).T
+        if rotation is not None:
+            xyz = (torch.tensor(rotation) @ xyz.T).T
+        # Backproject into fisheye
+        xyz = xyz / torch.norm(xyz, dim=-1, keepdim=True)
+        x = xyz[:, 0]
+        y = xyz[:, 1]
+        z = xyz[:, 2]
+        xi_src = calibs["mirror_parameters"]["xi"]
+        x = x / (z + xi_src)
+        y = y / (z + xi_src)
+        k1 = calibs["distortion_parameters"]["k1"]
+        k2 = calibs["distortion_parameters"]["k2"]
+        r = x * x + y * y
+        factor = 1 + k1 * r + k2 * r * r
+        x = x * factor
+        y = y * factor
+        gamma0 = calibs["projection_parameters"]["gamma1"]
+        gamma1 = calibs["projection_parameters"]["gamma2"]
+        u0 = calibs["projection_parameters"]["u0"]
+        v0 = calibs["projection_parameters"]["v0"]
+        x = x * gamma0 + u0
+        y = y * gamma1 + v0
+        xy = torch.stack((x, y), dim=-1).view(1, *target_image_size, 2)
+        self.sample_pts = xy
+    def resample(self, img):
+        img = img.unsqueeze(0)
+        resampled_img = F.grid_sample(img, self.sample_pts, align_corners=True).squeeze(
+            0
+        )
+        return resampled_img
+# TODO: probably move to KITTI-360 dataset
+# The KITTI 360 cameras have a 5 degrees negative inclination. We need to account for that.
+cam_incl_adjust = torch.tensor(
+    [
+        [1.0000000, 0.0000000, 0.0000000, 0],
+        [0.0000000, 0.9961947, 0.0871557, 0],
+        [0.0000000, -0.0871557, 0.9961947, 0],
+        [0.0000000, 000000000, 0.0000000, 1],
+    ],
+    dtype=torch.float32,
+).view(1, 1, 4, 4)
+def get_pts(x_range, y_range, z_range, ppm, ppm_y, y_res=None):  ## ppm:=pts_per_meter
+    x_res = abs(int((x_range[1] - x_range[0]) * ppm))
+    if y_res is None:
+        y_res = abs(int((y_range[1] - y_range[0]) * ppm_y))
+    z_res = abs(int((z_range[1] - z_range[0]) * ppm))
+    x = (
+        torch.linspace(x_range[0], x_range[1], x_res)
+        .view(1, 1, x_res)
+        .expand(y_res, z_res, -1)
+    )
+    z = (
+        torch.linspace(z_range[0], z_range[1], z_res)
+        .view(1, z_res, 1)
+        .expand(y_res, -1, x_res)
+    )
+    if y_res == 1:
+        y = (
+            torch.tensor([y_range[0] * 0.5 + y_range[1] * 0.5])
+            .view(y_res, 1, 1)
+            .expand(-1, z_res, x_res)
+        )
+    else:
+        y = (
+            torch.linspace(y_range[0], y_range[1], y_res)
+            .view(y_res, 1, 1)
+            .expand(-1, z_res, x_res)
+        )
+    xyz = torch.stack((x, y, z), dim=-1)
+    return xyz, (x_res, y_res, z_res)
+# This function takes all points between min_y and max_y and projects them into the x-z plane.
+# To avoid cases where there are no points at the top end, we consider also points that are beyond the maximum z distance.
+# The points are then converted to polar coordinates and sorted by angle.
+def get_lidar_slices(point_clouds, velo_poses, y_range, y_res, max_dist):
+    slices = []
+    ys = torch.linspace(y_range[0], y_range[1], y_res)
+    if y_res > 1:
+        slice_height = ys[1] - ys[0]
+    else:
+        slice_height = 0
+    n_bins = 360
+    for y in ys:
+        if y_res == 1:
+            min_y = y
+            max_y = y_range[-1]
+        else:
+            min_y = y - slice_height / 2
+            max_y = y + slice_height / 2
+        slice = []
+        for pc, velo_pose in zip(point_clouds, velo_poses):
+            pc_world = (velo_pose @ pc.T).T
+            mask = ((pc_world[:, 1] >= min_y) & (pc_world[:, 1] <= max_y)) | (
+                torch.norm(pc_world[:, :3], dim=-1) >= max_dist
+            )
+            slice_points = pc[mask, :2]
+            angles = torch.atan2(slice_points[:, 1], slice_points[:, 0])
+            dists = torch.norm(slice_points, dim=-1)
+            slice_points_polar = torch.stack((angles, dists), dim=1)
+            # Sort by angles for fast lookup
+            slice_points_polar = slice_points_polar[torch.sort(angles)[1], :]
+            slice_points_polar_binned = torch.zeros_like(slice_points_polar[:n_bins, :])
+            bin_borders = torch.linspace(
+                -math.pi, math.pi, n_bins + 1, device=slice_points_polar.device
+            )
+            dist = slice_points_polar[0, 1]
+            # To reduce noise, we bin the lidar points into bins of 1deg and then take the minimum distance per bin.
+            border_is = torch.searchsorted(slice_points_polar[:, 0], bin_borders)
+            for i in range(n_bins):
+                left_i, right_i = border_is[i], border_is[i + 1]
+                angle = (bin_borders[i] + bin_borders[i + 1]) * 0.5
+                if right_i > left_i:
+                    dist = torch.min(slice_points_polar[left_i:right_i, 1])
+                slice_points_polar_binned[i, 0] = angle
+                slice_points_polar_binned[i, 1] = dist
+            slice_points_polar = slice_points_polar_binned
+            # Append first element to last to have full 360deg coverage
+            slice_points_polar = torch.cat(
+                (
+                    torch.tensor(
+                        [
+                            [
+                                slice_points_polar[-1, 0] - math.pi * 2,
+                                slice_points_polar[-1, 1],
+                            ]
+                        ],
+                        device=slice_points_polar.device,
+                    ),
+                    slice_points_polar,
+                    torch.tensor(
+                        [
+                            [
+                                slice_points_polar[0, 0] + math.pi * 2,
+                                slice_points_polar[0, 1],
+                            ]
+                        ],
+                        device=slice_points_polar.device,
+                    ),
+                ),
+                dim=0,
+            )
+            slice.append(slice_points_polar)
+        slices.append(slice)
+    return slices
+def check_occupancy(pts, slices, velo_poses, min_dist=3):
+    is_occupied = torch.ones_like(pts[:, 0])
+    is_visible = torch.zeros_like(pts[:, 0], dtype=torch.bool)
+    thresh = (len(slices[0]) - 2) / len(slices[0])
+    pts = torch.cat((pts, torch.ones_like(pts[:, :1])), dim=-1)
+    world_to_velos = torch.inverse(velo_poses)
+    step = pts.shape[0] // len(slices)
+    for i, slice in enumerate(slices):
+        for j, (lidar_polar, world_to_velo) in enumerate(zip(slice, world_to_velos)):
+            pts_velo = (world_to_velo @ pts[i * step : (i + 1) * step, :].T).T
+            # Convert query points to polar coordinates in velo space
+            angles = torch.atan2(pts_velo[:, 1], pts_velo[:, 0])
+            dists = torch.norm(pts_velo, dim=-1)
+            indices = torch.searchsorted(lidar_polar[:, 0].contiguous(), angles)
+            left_angles = lidar_polar[indices - 1, 0]
+            right_angles = lidar_polar[indices, 0]
+            left_dists = lidar_polar[indices - 1, 1]
+            right_dists = lidar_polar[indices, 1]
+            interp = (angles - left_angles) / (right_angles - left_angles)
+            surface_dist = left_dists * (1 - interp) + right_dists * interp
+            is_occupied_velo = (dists > surface_dist) | (dists < min_dist)
+            is_occupied[i * step : (i + 1) * step] += is_occupied_velo.float()
+            if j == 0:
+                is_visible[i * step : (i + 1) * step] |= ~is_occupied_velo
+    is_occupied /= len(slices[0])
+    is_occupied = is_occupied > thresh
+    return is_occupied, is_visible
+class KITTIVelodyn:
+    def __init__(self, config) -> None:
+        self.config = config
+        self.occ_pts, self.yd = self._gen_pts()
+    def _gen_pts(self) -> torch.Tensor:
+        q_pts, (xd, yd, zd) = get_pts(
+            self.x_range, self.y_range, self.z_range, self.ppm, self.ppm_y, self.y_res
+        )
+        return q_pts, yd
+    def check_occupancy(self, points_all, velo_poses):
+        slices = get_lidar_slices(
+            points_all,
+            velo_poses,
+            self.config["y_range"],
+            self.yd,
+            (self.self.config["z_range"][0] ** 2 + self.self.config["x_range"][0] ** 2)
+            ** 0.5,
+        )
+        is_occupied, is_visible = check_occupancy(self.occ_pts, slices, velo_poses)
+        return is_occupied, is_visible
+class Kitti360Dataset(Dataset):
+    def __init__(
+        self,
+        data_path: str,
+        pose_path: str,
+        split_path: Optional[str],
+        target_image_size=(192, 640),
+        return_stereo=False,
+        return_depth=False,
+        return_fisheye=True,  ## default: True
+        return_3d_bboxes=False,
+        return_segmentation=False,
+        frame_count=2,
+        keyframe_offset=0,
+        dilation=1,
+        fisheye_rotation=0,
+        fisheye_offset=0,
+        stereo_offset=0,
+        eigen_depth=True,
+        color_aug=False,
+        is_preprocessed=False,
+        kitti_velodyn: KITTIVelodyn | None = None,
+    ):
+        self.data_path = data_path
+        self.pose_path = pose_path
+        self.split_path = split_path
+        self.target_image_size = target_image_size
+        self.return_stereo = return_stereo
+        self.return_fisheye = return_fisheye
+        self.return_depth = return_depth
+        self.return_3d_bboxes = return_3d_bboxes
+        self.return_segmentation = return_segmentation
+        self.frame_count = frame_count
+        self.dilation = dilation
+        self.fisheye_rotation = fisheye_rotation
+        self.fisheye_offset = fisheye_offset
+        self.stereo_offset = stereo_offset
+        self.keyframe_offset = keyframe_offset
+        self.eigen_depth = eigen_depth
+        self.color_aug = color_aug
+        self.is_preprocessed = is_preprocessed
+        self.kitti_velodyn = kitti_velodyn
+        if isinstance(self.fisheye_rotation, float) or isinstance(
+            self.fisheye_rotation, int
+        ):
+            self.fisheye_rotation = (0, self.fisheye_rotation)
+        self.fisheye_rotation = tuple(self.fisheye_rotation)
+        # if additional_random_front_offset and not self.random_fisheye_offset:
+        #     raise ValueError("Random Fisheye Offset needs to be active for additional random front offset!")
+        # else:
+        #     self.additional_random_front_offset = additional_random_front_offset
+        # Support random fisheye offset
+        if type(self.fisheye_offset) == int:
+            self.random_fisheye_offset = False
+            self.fisheye_offset = (self.fisheye_offset,)
+        elif type(self.fisheye_offset) in [
+            tuple,
+            list,
+            omegaconf.listconfig.ListConfig,
+        ]:
+            self.random_fisheye_offset = True
+            self.fisheye_offset = tuple(sorted(self.fisheye_offset))
+        else:
+            raise ValueError(
+                f"Invalid datatype for fisheye offset: {type(self.fisheye_offset)}"
+            )
+        if type(self.stereo_offset) == int:
+            self.random_stereo_offset = False
+            self.stereo_offset = (self.stereo_offset,)
+        elif type(self.stereo_offset) in [tuple, list, omegaconf.listconfig.ListConfig]:
+            self.random_stereo_offset = True
+            self.stereo_offset = tuple(sorted(self.stereo_offset))
+        else:
+            raise ValueError(
+                f"Invalid datatype for fisheye offset: {type(self.stereo_offset)}"
+            )
+        self._sequences = self._get_sequences(self.data_path)
+        self._calibs = self._load_calibs(self.data_path, self.fisheye_rotation)
+        self._resampler_02, self._resampler_03 = self._get_resamplers(
+            self._calibs, self._calibs["K_fisheye"], self.target_image_size
+        )
+        self._img_ids, self._poses = self._load_poses(self.pose_path, self._sequences)
+        self._left_offset = (
+            (self.frame_count - 1) // 2 + self.keyframe_offset
+        ) * self.dilation
+        self._perspective_folder = (
+            "data_rect"
+            if not self.is_preprocessed
+            else f"data_{self.target_image_size[0]}x{self.target_image_size[1]}"
+        )
+        self._fisheye_folder = (
+            "data_rgb"
+            if not self.is_preprocessed
+            else f"data_{self.target_image_size[0]}x{self.target_image_size[1]}_{self.fisheye_rotation[0]}x{self.fisheye_rotation[1]}"
+        )
+        if self.split_path is not None:
+            self._datapoints = self._load_split(self.split_path, self._img_ids)
+        elif self.return_segmentation:
+            self._datapoints = self._semantics_split(
+                self._sequences, self.data_path, self._img_ids
+            )
+        else:
+            self._datapoints = self._full_split(
+                self._sequences, self._img_ids, self.check_file_integrity
+            )
+        if self.return_3d_bboxes:
+            self._3d_bboxes = self._load_3d_bboxes(
+                Path(data_path) / "data_3d_bboxes" / "train_full", self._sequences
+            )
+        if self.return_segmentation:
+            # Segmentations are only provided for the left camera
+            self._datapoints = [dp for dp in self._datapoints if not dp[2]]
+        self._skip = 0
+        self.length = len(self._datapoints)
+    def check_file_integrity(self, seq, id):
+        dp = Path(self.data_path)
+        image_00 = dp / "data_2d_raw" / seq / "image_00" / self._perspective_folder
+        image_01 = dp / "data_2d_raw" / seq / "image_01" / self._perspective_folder
+        image_02 = dp / "data_2d_raw" / seq / "image_02" / self._fisheye_folder
+        image_03 = dp / "data_2d_raw" / seq / "image_03" / self._fisheye_folder
+        seq_len = self._img_ids[seq].shape[0]
+        ids = [id] + [
+            max(min(i, seq_len - 1), 0)
+            for i in range(
+                id - self._left_offset,
+                id - self._left_offset + self.frame_count * self.dilation,
+                self.dilation,
+            )
+            if i != id
+        ]
+        ids_fish = [max(min(id + self.fisheye_offset, seq_len - 1), 0)] + [
+            max(min(i, seq_len - 1), 0)
+            for i in range(
+                id + self.fisheye_offset - self._left_offset,
+                id
+                + self.fisheye_offset
+                - self._left_offset
+                + self.frame_count * self.dilation,
+                self.dilation,
+            )
+            if i != id + self.fisheye_offset
+        ]
+        img_ids = [self.get_img_id_from_id(seq, id) for id in ids]
+        img_ids_fish = [self.get_img_id_from_id(seq, id) for id in ids_fish]
+        for img_id in img_ids:
+            if not (
+                (image_00 / f"{img_id:010d}.png").exists()
+                and (image_01 / f"{img_id:010d}.png").exists()
+            ):
+                return False
+        if self.return_fisheye:
+            for img_id in img_ids_fish:
+                if not (
+                    (image_02 / f"{img_id:010d}.png").exists()
+                    and (image_03 / f"{img_id:010d}.png").exists()
+                ):
+                    return False
+        return True
+    @staticmethod
+    def _get_sequences(data_path):
+        all_sequences = []
+        seqs_path = Path(data_path) / "data_2d_raw"
+        for seq in seqs_path.iterdir():
+            if not seq.is_dir():
+                continue
+            all_sequences.append(seq.name)
+        return all_sequences
+    @staticmethod
+    def _full_split(sequences, img_ids, check_integrity):
+        datapoints = []
+        for seq in sorted(sequences):
+            ids = [id for id in range(len(img_ids[seq])) if check_integrity(seq, id)]
+            datapoints_seq = [(seq, id, False) for id in ids] + [
+                (seq, id, True) for id in ids
+            ]
+            datapoints.extend(datapoints_seq)
+        return datapoints
+    @staticmethod
+    def _semantics_split(sequences, data_path, img_ids):
+        datapoints = []
+        for seq in sorted(sequences):
+            datapoints_seq = [(seq, id, False) for id in range(len(img_ids[seq]))]
+            datapoints_seq = [
+                dp
+                for dp in datapoints_seq
+                if os.path.exists(
+                    os.path.join(
+                        data_path,
+                        "data_2d_semantics",
+                        "train",
+                        seq,
+                        "image_00",
+                        "semantic_rgb",
+                        f"{img_ids[seq][dp[1]]:010d}.png",
+                    )
+                )
+            ]
+            datapoints.extend(datapoints_seq)
+        return datapoints
+    @staticmethod
+    def _load_split(split_path, img_ids):
+        img_id2id = {
+            seq: {id: i for i, id in enumerate(ids)} for seq, ids in img_ids.items()
+        }
+        with open(split_path, "r") as f:
+            lines = f.readlines()
+        def split_line(l):
+            segments = l.split(" ")
+            seq = segments[0]
+            id = img_id2id[seq][int(segments[1])]
+            return seq, id, segments[2][0] == "r"
+        return list(map(split_line, lines))
+    @staticmethod
+    def _load_calibs(data_path, fisheye_rotation=0):
+        data_path = Path(data_path)
+        calib_folder = data_path / "calibration"
+        cam_to_pose_file = calib_folder / "calib_cam_to_pose.txt"
+        cam_to_velo_file = calib_folder / "calib_cam_to_velo.txt"
+        intrinsics_file = calib_folder / "perspective.txt"
+        fisheye_02_file = calib_folder / "image_02.yaml"
+        fisheye_03_file = calib_folder / "image_03.yaml"
+        cam_to_pose_data = {}
+        with open(cam_to_pose_file, "r") as f:
+            for line in f.readlines():
+                key, value = line.split(":", 1)
+                try:
+                    cam_to_pose_data[key] = np.array(
+                        [float(x) for x in value.split()], dtype=np.float32
+                    )
+                except ValueError:
+                    pass
+        cam_to_velo_data = None
+        with open(cam_to_velo_file, "r") as f:
+            line = f.readline()
+            try:
+                cam_to_velo_data = np.array(
+                    [float(x) for x in line.split()], dtype=np.float32
+                )
+            except ValueError:
+                pass
+        intrinsics_data = {}
+        with open(intrinsics_file, "r") as f:
+            for line in f.readlines():
+                key, value = line.split(":", 1)
+                try:
+                    intrinsics_data[key] = np.array(
+                        [float(x) for x in value.split()], dtype=np.float32
+                    )
+                except ValueError:
+                    pass
+        with open(fisheye_02_file, "r") as f:
+            f.readline()  # Skips first line that defines the YAML version
+            fisheye_02_data = yaml.safe_load(f)
+        with open(fisheye_03_file, "r") as f:
+            f.readline()  # Skips first line that defines the YAML version
+            fisheye_03_data = yaml.safe_load(f)
+        im_size_rect = (
+            int(intrinsics_data["S_rect_00"][1]),
+            int(intrinsics_data["S_rect_00"][0]),
+        )
+        im_size_fish = (fisheye_02_data["image_height"], fisheye_02_data["image_width"])
+        # Projection matrices
+        # We use these projection matrices also when resampling the fisheye cameras.
+        # This makes downstream processing easier, but it could be done differently.
+        P_rect_00 = np.reshape(intrinsics_data["P_rect_00"], (3, 4))
+        P_rect_01 = np.reshape(intrinsics_data["P_rect_01"], (3, 4))
+        # Rotation matrices from raw to rectified -> Needs to be inverted later
+        R_rect_00 = np.eye(4, dtype=np.float32)
+        R_rect_01 = np.eye(4, dtype=np.float32)
+        R_rect_00[:3, :3] = np.reshape(intrinsics_data["R_rect_00"], (3, 3))
+        R_rect_01[:3, :3] = np.reshape(intrinsics_data["R_rect_01"], (3, 3))
+        # Rotation matrices from resampled fisheye to raw fisheye
+        fisheye_rotation = np.array(fisheye_rotation).reshape((1, 2))
+        R_02 = np.eye(4, dtype=np.float32)
+        R_03 = np.eye(4, dtype=np.float32)
+        R_02[:3, :3] = (
+            Rotation.from_euler("xy", fisheye_rotation[:, [1, 0]], degrees=True)
+            .as_matrix()
+            .astype(np.float32)
+        )
+        R_03[:3, :3] = (
+            Rotation.from_euler(
+                "xy", fisheye_rotation[:, [1, 0]] * np.array([[1, -1]]), degrees=True
+            )
+            .as_matrix()
+            .astype(np.float32)
+        )
+        # Load cam to pose transforms
+        T_00_to_pose = np.eye(4, dtype=np.float32)
+        T_01_to_pose = np.eye(4, dtype=np.float32)
+        T_02_to_pose = np.eye(4, dtype=np.float32)
+        T_03_to_pose = np.eye(4, dtype=np.float32)
+        T_00_to_velo = np.eye(4, dtype=np.float32)
+        T_00_to_pose[:3, :] = np.reshape(cam_to_pose_data["image_00"], (3, 4))
+        T_01_to_pose[:3, :] = np.reshape(cam_to_pose_data["image_01"], (3, 4))
+        T_02_to_pose[:3, :] = np.reshape(cam_to_pose_data["image_02"], (3, 4))
+        T_03_to_pose[:3, :] = np.reshape(cam_to_pose_data["image_03"], (3, 4))
+        T_00_to_velo[:3, :] = np.reshape(cam_to_velo_data, (3, 4))
+        # Compute cam to pose transforms for rectified perspective cameras
+        T_rect_00_to_pose = T_00_to_pose @ np.linalg.inv(R_rect_00)
+        T_rect_01_to_pose = T_01_to_pose @ np.linalg.inv(R_rect_01)
+        # Compute cam to pose transform for fisheye cameras
+        T_02_to_pose = T_02_to_pose @ R_02
+        T_03_to_pose = T_03_to_pose @ R_03
+        # Compute velo to cameras and velo to pose transforms
+        T_velo_to_rect_00 = R_rect_00 @ np.linalg.inv(T_00_to_velo)
+        T_velo_to_pose = T_rect_00_to_pose @ T_velo_to_rect_00
+        T_velo_to_rect_01 = np.linalg.inv(T_rect_01_to_pose) @ T_velo_to_pose
+        # Calibration matrix is the same for both perspective cameras
+        K = P_rect_00[:3, :3]
+        # Normalize calibration
+        f_x = K[0, 0] / im_size_rect[1]
+        f_y = K[1, 1] / im_size_rect[0]
+        c_x = K[0, 2] / im_size_rect[1]
+        c_y = K[1, 2] / im_size_rect[0]
+        # Change to image coordinates [-1, 1]
+        K[0, 0] = f_x * 2.0
+        K[1, 1] = f_y * 2.0
+        K[0, 2] = c_x * 2.0 - 1
+        K[1, 2] = c_y * 2.0 - 1
+        # Convert fisheye calibration to [-1, 1] image dimensions
+        fisheye_02_data["projection_parameters"]["gamma1"] = (
+            fisheye_02_data["projection_parameters"]["gamma1"] / im_size_fish[1]
+        ) * 2.0
+        fisheye_02_data["projection_parameters"]["gamma2"] = (
+            fisheye_02_data["projection_parameters"]["gamma2"] / im_size_fish[0]
+        ) * 2.0
+        fisheye_02_data["projection_parameters"]["u0"] = (
+            fisheye_02_data["projection_parameters"]["u0"] / im_size_fish[1]
+        ) * 2.0 - 1.0
+        fisheye_02_data["projection_parameters"]["v0"] = (
+            fisheye_02_data["projection_parameters"]["v0"] / im_size_fish[0]
+        ) * 2.0 - 1.0
+        fisheye_03_data["projection_parameters"]["gamma1"] = (
+            fisheye_03_data["projection_parameters"]["gamma1"] / im_size_fish[1]
+        ) * 2.0
+        fisheye_03_data["projection_parameters"]["gamma2"] = (
+            fisheye_03_data["projection_parameters"]["gamma2"] / im_size_fish[0]
+        ) * 2.0
+        fisheye_03_data["projection_parameters"]["u0"] = (
+            fisheye_03_data["projection_parameters"]["u0"] / im_size_fish[1]
+        ) * 2.0 - 1.0
+        fisheye_03_data["projection_parameters"]["v0"] = (
+            fisheye_03_data["projection_parameters"]["v0"] / im_size_fish[0]
+        ) * 2.0 - 1.0
+        # Use same camera calibration as perspective cameras for resampling
+        # K_fisheye = np.eye(3, dtype=np.float32)
+        # K_fisheye[0, 0] = 2
+        # K_fisheye[1, 1] = 2
+        K_fisheye = K
+        calibs = {
+            "K_perspective": K,
+            "K_fisheye": K_fisheye,
+            "T_cam_to_pose": {
+                "00": T_rect_00_to_pose,
+                "01": T_rect_01_to_pose,
+                "02": T_02_to_pose,
+                "03": T_03_to_pose,
+            },
+            "T_velo_to_cam": {
+                "00": T_velo_to_rect_00,
+                "01": T_velo_to_rect_01,
+            },
+            "T_velo_to_pose": T_velo_to_pose,
+            "fisheye": {
+                "calib_02": fisheye_02_data,
+                "calib_03": fisheye_03_data,
+                "R_02": R_02[:3, :3],
+                "R_03": R_03[:3, :3],
+            },
+            "im_size": im_size_rect,
+        }
+        return calibs
+    @staticmethod
+    def _get_resamplers(calibs, K_target, target_image_size):
+        resampler_02 = FisheyeToPinholeSampler(
+            K_target,
+            target_image_size,
+            calibs["fisheye"]["calib_02"],
+            calibs["fisheye"]["R_02"],
+        )
+        resampler_03 = FisheyeToPinholeSampler(
+            K_target,
+            target_image_size,
+            calibs["fisheye"]["calib_03"],
+            calibs["fisheye"]["R_03"],
+        )
+        return resampler_02, resampler_03
+    @staticmethod
+    def _load_poses(pose_path, sequences):
+        ids = {}
+        poses = {}
+        for seq in sequences:
+            pose_file = Path(pose_path) / seq / f"poses.txt"
+            try:
+                pose_data = np.loadtxt(pose_file)
+            except FileNotFoundError:
+                print(f"Ground truth poses are not avaialble for sequence {seq}.")
+            ids_seq = pose_data[:, 0].astype(int)
+            poses_seq = pose_data[:, 1:].astype(np.float32).reshape((-1, 3, 4))
+            poses_seq = np.concatenate(
+                (poses_seq, np.zeros_like(poses_seq[:, :1, :])), axis=1
+            )
+            poses_seq[:, 3, 3] = 1
+            ids[seq] = ids_seq
+            poses[seq] = poses_seq
+        return ids, poses
+    @staticmethod
+    def _load_3d_bboxes(bbox_path, sequences):
+        bboxes = {}
+        for seq in sequences:
+            with open(Path(bbox_path) / f"{seq}.xml", "rb") as f:
+                tree = ET.parse(f)
+            root = tree.getroot()
+            objects = defaultdict(list)
+            num_bbox = 0
+            for child in root:
+                if child.find("transform") is None:
+                    continue
+                obj = KITTI360Bbox3D()
+                if child.find("semanticId") is not None:
+                    obj.parseBbox(child)
+                else:
+                    obj.parseStuff(child)
+                # globalId = local2global(obj.semanticId, obj.instanceId)
+                # objects[globalId][obj.timestamp] = obj
+                objects[obj.timestamp].append(obj)
+                num_bbox += 1
+            # globalIds = np.asarray(list(objects.keys()))
+            # semanticIds, instanceIds = global2local(globalIds)
+            # for label in labels:
+            #     if label.hasInstances:
+            #         print(f'{label.name:<30}:\t {(semanticIds==label.id).sum()}')
+            # print(f'Loaded {len(globalIds)} instances')
+            # print(f'Loaded {num_bbox} boxes')
+            bboxes[seq] = objects
+        return bboxes
+    def get_img_id_from_id(self, sequence, id):
+        return self._img_ids[sequence][id]
+    def load_images(self, seq, img_ids, load_left, load_right, img_ids_fish=None):
+        imgs_p_left = []
+        imgs_f_left = []
+        imgs_p_right = []
+        imgs_f_right = []
+        if img_ids_fish is None:
+            img_ids_fish = img_ids
+        for id in img_ids:
+            if load_left:
+                img_perspective = (
+                    cv2.cvtColor(
+                        cv2.imread(
+                            os.path.join(
+                                self.data_path,
+                                "data_2d_raw",
+                                seq,
+                                "image_00",
+                                self._perspective_folder,
+                                f"{id:010d}.png",
+                            )
+                        ),
+                        cv2.COLOR_BGR2RGB,
+                    ).astype(np.float32)
+                    / 255
+                )
+                imgs_p_left += [img_perspective]
+            if load_right:
+                img_perspective = (
+                    cv2.cvtColor(
+                        cv2.imread(
+                            os.path.join(
+                                self.data_path,
+                                "data_2d_raw",
+                                seq,
+                                "image_01",
+                                self._perspective_folder,
+                                f"{id:010d}.png",
+                            )
+                        ),
+                        cv2.COLOR_BGR2RGB,
+                    ).astype(np.float32)
+                    / 255
+                )
+                imgs_p_right += [img_perspective]
+        for id in img_ids_fish:
+            if load_left:
+                img_fisheye = (
+                    cv2.cvtColor(
+                        cv2.imread(
+                            os.path.join(
+                                self.data_path,
+                                "data_2d_raw",
+                                seq,
+                                "image_02",
+                                self._fisheye_folder,
+                                f"{id:010d}.png",
+                            )
+                        ),
+                        cv2.COLOR_BGR2RGB,
+                    ).astype(np.float32)
+                    / 255
+                )
+                imgs_f_left += [img_fisheye]
+            if load_right:
+                img_fisheye = (
+                    cv2.cvtColor(
+                        cv2.imread(
+                            os.path.join(
+                                self.data_path,
+                                "data_2d_raw",
+                                seq,
+                                "image_03",
+                                self._fisheye_folder,
+                                f"{id:010d}.png",
+                            )
+                        ),
+                        cv2.COLOR_BGR2RGB,
+                    ).astype(np.float32)
+                    / 255
+                )
+                imgs_f_right += [img_fisheye]
+        return imgs_p_left, imgs_f_left, imgs_p_right, imgs_f_right
+    def process_img(
+        self,
+        img: np.array,
+        color_aug_fn=None,
+        resampler: FisheyeToPinholeSampler = None,
+    ):
+        if resampler is not None and not self.is_preprocessed:
+            img = torch.tensor(img).permute(2, 0, 1)
+            img = resampler.resample(img)
+        else:
+            if self.target_image_size:
+                img = cv2.resize(
+                    img,
+                    (self.target_image_size[1], self.target_image_size[0]),
+                    interpolation=cv2.INTER_LINEAR,
+                )
+            img = np.transpose(img, (2, 0, 1))
+            img = torch.tensor(img)
+        if color_aug_fn is not None:
+            img = color_aug_fn(img)
+        img = img * 2 - 1
+        return img
+    def load_occ(self, seq, poses):
+        world_transform = torch.inverse(poses[:1, :, :])
+        world_transform = cam_incl_adjust @ world_transform
+        seq_len = self._img_ids[seq].shape[0]
+        # Load lidar pointclouds
+        points_all, velo_poses = [], []
+        for id in range(id, min(id + self.aggregate_timesteps, seq_len)):
+            points = np.fromfile(
+                os.path.join(
+                    self.data_path,
+                    "data_3d_raw",
+                    seq,
+                    "velodyne_points",
+                    "data",
+                    f"{self._img_ids[seq][id]:010d}.bin",
+                ),
+                dtype=np.float32,
+            ).reshape(-1, 4)
+            points[:, 3] = 1.0
+            points = torch.tensor(points)
+            velo_pose = (
+                world_transform.squeeze()
+                @ torch.tensor(self._poses[seq][id])
+                @ torch.tensor(self._calibs["T_velo_to_pose"])
+            )
+            points_all.append(points)
+            velo_poses.append(velo_pose)
+        velo_poses = torch.stack(velo_poses, dim=0)
+        return self.kitti_velodyn.check_occupancy(points_all, velo_poses)
+    def get_3d_bboxes(self, seq, img_id, pose, projs):
+        seq_3d_bboxes = self._3d_bboxes[seq]
+        pose_w2c = np.linalg.inv(pose)
+        def filter_bbox(bbox):
+            verts = bbox.vertices
+            verts = (projs @ (pose_w2c[:3, :3] @ verts.T + pose_w2c[:3, 3, None])).T
+            verts[:, :2] /= verts[:, 2:3]
+            valid = (
+                ((verts[:, 0] >= -1) & (verts[:, 0] <= 1))
+                & ((verts[:, 1] >= -1) & (verts[:, 1] <= 1))
+                & ((verts[:, 2] > 0) & (verts[:, 2] <= 80))
+            )
+            valid = np.any(valid, axis=-1)
+            return valid
+        bboxes = seq_3d_bboxes[-1] + seq_3d_bboxes[img_id]
+        bboxes = list(filter(filter_bbox, bboxes))
+        bboxes = [
+            {
+                "vertices": bbox.vertices,
+                "faces": bbox.faces,
+                "semanticId": bbox.semanticId,
+                "instanceId": bbox.instanceId,
+            }
+            for i, bbox in enumerate(bboxes)
+        ]  # if valid[i]
+        return bboxes
+    def load_segmentation(self, seq, img_id):
+        seg = cv2.imread(
+            os.path.join(
+                self.data_path,
+                "data_2d_semantics",
+                "train",
+                seq,
+                "image_00",
+                "semantic",
+                f"{img_id:010d}.png",
+            ),
+            cv2.IMREAD_UNCHANGED,
+        )
+        seg = cv2.resize(
+            seg,
+            (self.target_image_size[1], self.target_image_size[0]),
+            interpolation=cv2.INTER_NEAREST,
+        )
+        return seg
+    def load_depth(self, seq, img_id, is_right):
+        points = np.fromfile(
+            os.path.join(
+                self.data_path,
+                "data_3d_raw",
+                seq,
+                "velodyne_points",
+                "data",
+                f"{img_id:010d}.bin",
+            ),
+            dtype=np.float32,
+        ).reshape(-1, 4)
+        points[:, 3] = 1.0
+        T_velo_to_cam = self._calibs["T_velo_to_cam"]["00" if not is_right else "01"]
+        K = self._calibs["K_perspective"]
+        # project the points to the camera
+        velo_pts_im = np.dot(K @ T_velo_to_cam[:3, :], points.T).T
+        velo_pts_im[:, :2] = velo_pts_im[:, :2] / velo_pts_im[:, 2][..., None]
+        # the projection is normalized to [-1, 1] -> transform to [0, height-1] x [0, width-1]
+        velo_pts_im[:, 0] = np.round(
+            (velo_pts_im[:, 0] * 0.5 + 0.5) * self.target_image_size[1]
+        )
+        velo_pts_im[:, 1] = np.round(
+            (velo_pts_im[:, 1] * 0.5 + 0.5) * self.target_image_size[0]
+        )
+        # check if in bounds
+        val_inds = (velo_pts_im[:, 0] >= 0) & (velo_pts_im[:, 1] >= 0)
+        val_inds = (
+            val_inds
+            & (velo_pts_im[:, 0] < self.target_image_size[1])
+            & (velo_pts_im[:, 1] < self.target_image_size[0])
+        )
+        velo_pts_im = velo_pts_im[val_inds, :]
+        # project to image
+        depth = np.zeros(self.target_image_size)
+        depth[
+            velo_pts_im[:, 1].astype(np.int32), velo_pts_im[:, 0].astype(np.int32)
+        ] = velo_pts_im[:, 2]
+        # find the duplicate points and choose the closest depth
+        inds = (
+            velo_pts_im[:, 1] * (self.target_image_size[1] - 1) + velo_pts_im[:, 0] - 1
+        )
+        dupe_inds = [item for item, count in Counter(inds).items() if count > 1]
+        for dd in dupe_inds:
+            pts = np.where(inds == dd)[0]
+            x_loc = int(velo_pts_im[pts[0], 0])
+            y_loc = int(velo_pts_im[pts[0], 1])
+            depth[y_loc, x_loc] = velo_pts_im[pts, 2].min()
+        depth[depth < 0] = 0
+        return depth[None, :, :]
+    def __getitem__(self, index: int):
+        _start_time = time.time()
+        if index >= self.length:
+            raise IndexError()
+        if self._skip != 0:
+            index += self._skip
+        sequence, id, is_right = self._datapoints[index]
+        seq_len = self._img_ids[sequence].shape[0]
+        load_left, load_right = (
+            not is_right
+        ) or self.return_stereo, is_right or self.return_stereo
+        ## randomly sample fisheye in the time steps where it can see the occlusion with the stereo
+        if self.random_fisheye_offset:
+            fisheye_offset = self.fisheye_offset[
+                torch.randint(0, len(self.fisheye_offset), (1,)).item()
+            ]  ## randomly select among the given list of fisheye_ids from config
+        else:
+            fisheye_offset = self.fisheye_offset[-1]
+        if self.random_stereo_offset:
+            stereo_offset = self.stereo_offset[
+                torch.randint(0, len(self.stereo_offset), (1,)).item()
+            ]
+        else:
+            stereo_offset = self.stereo_offset[0]
+        # ids = [id] + [max(min(i, seq_len-1), 0) for i in range(id - self._left_offset, id - self._left_offset + self.frame_count * self.dilation, self.dilation) if i != id]
+        # ids_fish = [max(min(id + self.fisheye_offset, seq_len-1), 0)] + [max(min(i, seq_len-1), 0) for i in range(id + self.fisheye_offset - self._left_offset, id + self.fisheye_offset - self._left_offset + self.frame_count * self.dilation, self.dilation) if i != id + self.fisheye_offset]
+        # img_ids = [self.get_img_id_from_id(sequence, id) for id in ids]
+        # img_ids_fish = [self.get_img_id_from_id(sequence, id) for id in ids_fish]
+        id_st = (
+            id + stereo_offset - 1
+        )  ## TODO: find out how to deal with 3 steps ahead without -1 => as we sample scenes with the amount of stereo_offset
+        ids = [id] + [
+            max(min(i, seq_len - 1), 0)
+            for i in range(
+                id_st - self._left_offset,
+                id_st - self._left_offset + self.frame_count * self.dilation,
+                self.dilation,
+            )
+            if i != id_st
+        ]
+        ids_fish = [max(min(id + fisheye_offset, seq_len - 1), 0)] + [
+            max(min(i, seq_len - 1), 0)
+            for i in range(
+                id + fisheye_offset - self._left_offset,
+                id
+                + fisheye_offset
+                - self._left_offset
+                + self.frame_count * self.dilation,
+                self.dilation,
+            )
+            if i != id + fisheye_offset
+        ]
+        ## and now ids_fish is 5 steps ahead of ids with 2 fisheye scenes
+        img_ids = [self.get_img_id_from_id(sequence, id) for id in ids]
+        img_ids_fish = [self.get_img_id_from_id(sequence, id) for id in ids_fish]
+        if not self.return_fisheye:
+            ids_fish, img_ids_fish = [], []
+        if self.color_aug:
+            color_aug_fn = get_color_aug_fn(
+                ColorJitter.get_params(
+                    brightness=(0.8, 1.2),
+                    contrast=(0.8, 1.2),
+                    saturation=(0.8, 1.2),
+                    hue=(-0.1, 0.1),
+                )
+            )
+        else:
+            color_aug_fn = None
+        _start_time_loading = time.time()
+        imgs_p_left, imgs_f_left, imgs_p_right, imgs_f_right = self.load_images(
+            sequence, img_ids, load_left, load_right, img_ids_fish=img_ids_fish
+        )
+        _loading_time = np.array(time.time() - _start_time_loading)
+        _start_time_processing = time.time()
+        imgs_p_left = [
+            self.process_img(img, color_aug_fn=color_aug_fn) for img in imgs_p_left
+        ]
+        imgs_f_left = [
+            self.process_img(
+                img, color_aug_fn=color_aug_fn, resampler=self._resampler_02
+            )
+            for img in imgs_f_left
+        ]
+        imgs_p_right = [
+            self.process_img(img, color_aug_fn=color_aug_fn) for img in imgs_p_right
+        ]
+        imgs_f_right = [
+            self.process_img(
+                img, color_aug_fn=color_aug_fn, resampler=self._resampler_03
+            )
+            for img in imgs_f_right
+        ]
+        _processing_time = np.array(time.time() - _start_time_processing)
+        # These poses are camera to world !!
+        poses_p_left = (
+            [
+                self._poses[sequence][i, :, :] @ self._calibs["T_cam_to_pose"]["00"]
+                for i in ids
+            ]
+            if load_left
+            else []
+        )
+        poses_f_left = (
+            [
+                self._poses[sequence][i, :, :] @ self._calibs["T_cam_to_pose"]["02"]
+                for i in ids_fish
+            ]
+            if load_left
+            else []
+        )
+        poses_p_right = (
+            [
+                self._poses[sequence][i, :, :] @ self._calibs["T_cam_to_pose"]["01"]
+                for i in ids
+            ]
+            if load_right
+            else []
+        )
+        poses_f_right = (
+            [
+                self._poses[sequence][i, :, :] @ self._calibs["T_cam_to_pose"]["03"]
+                for i in ids_fish
+            ]
+            if load_right
+            else []
+        )
+        projs_p_left = [self._calibs["K_perspective"] for _ in ids] if load_left else []
+        projs_f_left = (
+            [self._calibs["K_fisheye"] for _ in ids_fish] if load_left else []
+        )
+        projs_p_right = (
+            [self._calibs["K_perspective"] for _ in ids] if load_right else []
+        )
+        projs_f_right = (
+            [self._calibs["K_fisheye"] for _ in ids_fish] if load_right else []
+        )
+        imgs = (
+            imgs_p_left + imgs_p_right + imgs_f_left + imgs_f_right
+            if not is_right
+            else imgs_p_right + imgs_p_left + imgs_f_right + imgs_f_left
+        )
+        projs = (
+            projs_p_left + projs_p_right + projs_f_left + projs_f_right
+            if not is_right
+            else projs_p_right + projs_p_left + projs_f_right + projs_f_left
+        )
+        poses = (
+            poses_p_left + poses_p_right + poses_f_left + poses_f_right
+            if not is_right
+            else poses_p_right + poses_p_left + poses_f_right + poses_f_left
+        )
+        ids = np.array(ids + ids + ids_fish + ids_fish, dtype=np.int32)
+        if self.return_depth:
+            depths = [self.load_depth(sequence, img_ids[0], is_right)]
+        else:
+            depths = []
+        if self.return_3d_bboxes:
+            bboxes_3d = [self.get_3d_bboxes(sequence, img_ids[0], poses[0], projs[0])]
+        else:
+            bboxes_3d = []
+        if self.return_segmentation:
+            segs = [self.load_segmentation(sequence, img_ids[0])]
+        else:
+            segs = []
+        if self.kitti_velodyn:
+            is_occupied, is_visible = self.load_occ(sequence, poses)
+        else:
+            is_occupied, is_visible = [], []
+        _proc_time = np.array(time.time() - _start_time)
+        # print(_loading_time, _processing_time, _proc_time)
+        data = {
+            "imgs": imgs,
+            "projs": projs,
+            "poses": poses,
+            "depths": depths,
+            "ts": ids,
+            "3d_bboxes": bboxes_3d,
+            "segs": segs,
+            "is_occupied": is_occupied,
+            "is_visible": is_visible,
+            "t__get_item__": np.array([_proc_time]),
+            "index": np.array([index]),
+        }
+        return data
+    def __len__(self) -> int:
+        # return 10
+        return self.length

datasets/kitti_360/labels.py ADDED Viewed

	@@ -0,0 +1,200 @@

+#!/usr/bin/python
+#
+# KITTI-360 labels
+#
+from collections import namedtuple
+#--------------------------------------------------------------------------------
+# Definitions
+#--------------------------------------------------------------------------------
+# a label and all meta information
+Label = namedtuple( 'Label' , [
+    'name'        , # The identifier of this label, e.g. 'car', 'person', ... .
+                    # We use them to uniquely name a class
+    'id'          , # An integer ID that is associated with this label.
+                    # The IDs are used to represent the label in ground truth images
+                    # An ID of -1 means that this label does not have an ID and thus
+                    # is ignored when creating ground truth images (e.g. license plate).
+                    # Do not modify these IDs, since exactly these IDs are expected by the
+                    # evaluation server.
+    'kittiId'     , # An integer ID that is associated with this label for KITTI-360
+                    # NOT FOR RELEASING
+    'trainId'     , # Feel free to modify these IDs as suitable for your method. Then create
+                    # ground truth images with train IDs, using the tools provided in the
+                    # 'preparation' folder. However, make sure to validate or submit results
+                    # to our evaluation server using the regular IDs above!
+                    # For trainIds, multiple labels might have the same ID. Then, these labels
+                    # are mapped to the same class in the ground truth images. For the inverse
+                    # mapping, we use the label that is defined first in the list below.
+                    # For example, mapping all void-type classes to the same ID in training,
+                    # might make sense for some approaches.
+                    # Max value is 255!
+    'category'    , # The name of the category that this label belongs to
+    'categoryId'  , # The ID of this category. Used to create ground truth images
+                    # on category level.
+    'hasInstances', # Whether this label distinguishes between single instances or not
+    'ignoreInEval', # Whether pixels having this class as ground truth label are ignored
+                    # during evaluations or not
+    'ignoreInInst', # Whether pixels having this class as ground truth label are ignored
+                    # during evaluations of instance segmentation or not
+    'color'       , # The color of this label
+    ] )
+#--------------------------------------------------------------------------------
+# A list of all labels
+#--------------------------------------------------------------------------------
+# Please adapt the train IDs as appropriate for your approach.
+# Note that you might want to ignore labels with ID 255 during training.
+# Further note that the current train IDs are only a suggestion. You can use whatever you like.
+# Make sure to provide your results using the original IDs and not the training IDs.
+# Note that many IDs are ignored in evaluation and thus you never need to predict these!
+labels = [
+    #       name                     id    kittiId,    trainId   category            catId     hasInstances   ignoreInEval   ignoreInInst   color
+    Label(  'unlabeled'            ,  0 ,       -1 ,       255 , 'void'            , 0       , False        , True         , True         , (  0,  0,  0) ),
+    Label(  'ego vehicle'          ,  1 ,       -1 ,       255 , 'void'            , 0       , False        , True         , True         , (  0,  0,  0) ),
+    Label(  'rectification border' ,  2 ,       -1 ,       255 , 'void'            , 0       , False        , True         , True         , (  0,  0,  0) ),
+    Label(  'out of roi'           ,  3 ,       -1 ,       255 , 'void'            , 0       , False        , True         , True         , (  0,  0,  0) ),
+    Label(  'static'               ,  4 ,       -1 ,       255 , 'void'            , 0       , False        , True         , True         , (  0,  0,  0) ),
+    Label(  'dynamic'              ,  5 ,       -1 ,       255 , 'void'            , 0       , False        , True         , True         , (111, 74,  0) ),
+    Label(  'ground'               ,  6 ,       -1 ,       255 , 'void'            , 0       , False        , True         , True         , ( 81,  0, 81) ),
+    Label(  'road'                 ,  7 ,        1 ,         0 , 'flat'            , 1       , False        , False        , False        , (128, 64,128) ),
+    Label(  'sidewalk'             ,  8 ,        3 ,         1 , 'flat'            , 1       , False        , False        , False        , (244, 35,232) ),
+    Label(  'parking'              ,  9 ,        2 ,       255 , 'flat'            , 1       , False        , True         , True         , (250,170,160) ),
+    Label(  'rail track'           , 10 ,        10,       255 , 'flat'            , 1       , False        , True         , True         , (230,150,140) ),
+    Label(  'building'             , 11 ,        11,         2 , 'construction'    , 2       , True         , False        , False        , ( 70, 70, 70) ),
+    Label(  'wall'                 , 12 ,        7 ,         3 , 'construction'    , 2       , False        , False        , False        , (102,102,156) ),
+    Label(  'fence'                , 13 ,        8 ,         4 , 'construction'    , 2       , False        , False        , False        , (190,153,153) ),
+    Label(  'guard rail'           , 14 ,        30,       255 , 'construction'    , 2       , False        , True         , True         , (180,165,180) ),
+    Label(  'bridge'               , 15 ,        31,       255 , 'construction'    , 2       , False        , True         , True         , (150,100,100) ),
+    Label(  'tunnel'               , 16 ,        32,       255 , 'construction'    , 2       , False        , True         , True         , (150,120, 90) ),
+    Label(  'pole'                 , 17 ,        21,         5 , 'object'          , 3       , True         , False        , True         , (153,153,153) ),
+    Label(  'polegroup'            , 18 ,       -1 ,       255 , 'object'          , 3       , False        , True         , True         , (153,153,153) ),
+    Label(  'traffic light'        , 19 ,        23,         6 , 'object'          , 3       , True         , False        , True         , (250,170, 30) ),
+    Label(  'traffic sign'         , 20 ,        24,         7 , 'object'          , 3       , True         , False        , True         , (220,220,  0) ),
+    Label(  'vegetation'           , 21 ,        5 ,         8 , 'nature'          , 4       , False        , False        , False        , (107,142, 35) ),
+    Label(  'terrain'              , 22 ,        4 ,         9 , 'nature'          , 4       , False        , False        , False        , (152,251,152) ),
+    Label(  'sky'                  , 23 ,        9 ,        10 , 'sky'             , 5       , False        , False        , False        , ( 70,130,180) ),
+    Label(  'person'               , 24 ,        19,        11 , 'human'           , 6       , True         , False        , False        , (220, 20, 60) ),
+    Label(  'rider'                , 25 ,        20,        12 , 'human'           , 6       , True         , False        , False        , (255,  0,  0) ),
+    Label(  'car'                  , 26 ,        13,        13 , 'vehicle'         , 7       , True         , False        , False        , (  0,  0,142) ),
+    Label(  'truck'                , 27 ,        14,        14 , 'vehicle'         , 7       , True         , False        , False        , (  0,  0, 70) ),
+    Label(  'bus'                  , 28 ,        34,        15 , 'vehicle'         , 7       , True         , False        , False        , (  0, 60,100) ),
+    Label(  'caravan'              , 29 ,        16,       255 , 'vehicle'         , 7       , True         , True         , True         , (  0,  0, 90) ),
+    Label(  'trailer'              , 30 ,        15,       255 , 'vehicle'         , 7       , True         , True         , True         , (  0,  0,110) ),
+    Label(  'train'                , 31 ,        33,        16 , 'vehicle'         , 7       , True         , False        , False        , (  0, 80,100) ),
+    Label(  'motorcycle'           , 32 ,        17,        17 , 'vehicle'         , 7       , True         , False        , False        , (  0,  0,230) ),
+    Label(  'bicycle'              , 33 ,        18,        18 , 'vehicle'         , 7       , True         , False        , False        , (119, 11, 32) ),
+    Label(  'garage'               , 34 ,        12,         2 , 'construction'    , 2       , True         , True         , True         , ( 64,128,128) ),
+    Label(  'gate'                 , 35 ,        6 ,         4 , 'construction'    , 2       , False        , True         , True         , (190,153,153) ),
+    Label(  'stop'                 , 36 ,        29,       255 , 'construction'    , 2       , True         , True         , True         , (150,120, 90) ),
+    Label(  'smallpole'            , 37 ,        22,         5 , 'object'          , 3       , True         , True         , True         , (153,153,153) ),
+    Label(  'lamp'                 , 38 ,        25,       255 , 'object'          , 3       , True         , True         , True         , (0,   64, 64) ),
+    Label(  'trash bin'            , 39 ,        26,       255 , 'object'          , 3       , True         , True         , True         , (0,  128,192) ),
+    Label(  'vending machine'      , 40 ,        27,       255 , 'object'          , 3       , True         , True         , True         , (128, 64,  0) ),
+    Label(  'box'                  , 41 ,        28,       255 , 'object'          , 3       , True         , True         , True         , (64,  64,128) ),
+    Label(  'unknown construction' , 42 ,        35,       255 , 'void'            , 0       , False        , True         , True         , (102,  0,  0) ),
+    Label(  'unknown vehicle'      , 43 ,        36,       255 , 'void'            , 0       , False        , True         , True         , ( 51,  0, 51) ),
+    Label(  'unknown object'       , 44 ,        37,       255 , 'void'            , 0       , False        , True         , True         , ( 32, 32, 32) ),
+    Label(  'license plate'        , -1 ,        -1,        -1 , 'vehicle'         , 7       , False        , True         , True         , (  0,  0,142) ),
+]
+#--------------------------------------------------------------------------------
+# Create dictionaries for a fast lookup
+#--------------------------------------------------------------------------------
+# Please refer to the main method below for example usages!
+# name to label object
+name2label      = { label.name    : label for label in labels           }
+# id to label object
+id2label        = { label.id      : label for label in labels           }
+# trainId to label object
+trainId2label   = { label.trainId : label for label in reversed(labels) }
+# KITTI-360 ID to cityscapes ID
+kittiId2label   = { label.kittiId : label for label in labels           }
+# category to list of label objects
+category2labels = {}
+for label in labels:
+    category = label.category
+    if category in category2labels:
+        category2labels[category].append(label)
+    else:
+        category2labels[category] = [label]
+#--------------------------------------------------------------------------------
+# Assure single instance name
+#--------------------------------------------------------------------------------
+# returns the label name that describes a single instance (if possible)
+# e.g.     input     |   output
+#        ----------------------
+#          car       |   car
+#          cargroup  |   car
+#          foo       |   None
+#          foogroup  |   None
+#          skygroup  |   None
+def assureSingleInstanceName( name ):
+    # if the name is known, it is not a group
+    if name in name2label:
+        return name
+    # test if the name actually denotes a group
+    if not name.endswith("group"):
+        return None
+    # remove group
+    name = name[:-len("group")]
+    # test if the new name exists
+    if not name in name2label:
+        return None
+    # test if the new name denotes a label that actually has instances
+    if not name2label[name].hasInstances:
+        return None
+    # all good then
+    return name
+#--------------------------------------------------------------------------------
+# Main for testing
+#--------------------------------------------------------------------------------
+# just a dummy main
+if __name__ == "__main__":
+    # Print all the labels
+    print("List of KITTI-360 labels:")
+    print("")
+    print("    {:>21} | {:>3} | {:>7} | {:>14} | {:>10} | {:>12} | {:>12}".format( 'name', 'id', 'trainId', 'category', 'categoryId', 'hasInstances', 'ignoreInEval' ))
+    print("    " + ('-' * 98))
+    for label in labels:
+        # print("    {:>21} | {:>3} | {:>7} | {:>14} | {:>10} | {:>12} | {:>12}".format( label.name, label.id, label.trainId, label.category, label.categoryId, label.hasInstances, label.ignoreInEval ))
+        print(" \"{:}\"".format(label.name))
+    print("")
+    print("Example usages:")
+    # Map from name to label
+    name = 'car'
+    id   = name2label[name].id
+    print("ID of label '{name}': {id}".format( name=name, id=id ))
+    # Map from ID to label
+    category = id2label[id].category
+    print("Category of label with ID '{id}': {category}".format( id=id, category=category ))
+    # Map from trainID to label
+    trainId = 0
+    name = trainId2label[trainId].name
+    print("Name of label with trainID '{id}': {name}".format( id=trainId, name=name ))

datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0000_sync/poses.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0002_sync/poses.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0003_sync/poses.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0004_sync/poses.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0005_sync/poses.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0006_sync/poses.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0007_sync/poses.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0009_sync/poses.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0010_sync/poses.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

datasets/kitti_360/preprocess_kitti_360.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import argparse
+import sys
+sys.path.append(".")
+from pathlib import Path
+import subprocess
+import cv2
+import numpy as np
+from tqdm import tqdm
+from datasets.kitti_360.kitti_360_dataset import Kitti360Dataset
+DRY_RUN = False
+def main():
+    parser = argparse.ArgumentParser("KITTI 360 Preprocessing")
+    parser.add_argument("--data_path", "-d", type=str)
+    parser.add_argument("--resolution", "-r", default=(192, 640))
+    parser.add_argument("--fisheye_rotation", "-f", default=(0, -15))
+    parser.add_argument("--only_fisheye", "-o", action="store_true")
+    args = parser.parse_args()
+    data_path = Path(args.data_path)
+    resolution = args.resolution
+    rotation = args.fisheye_rotation
+    only_fisheye = args.only_fisheye
+    print("Setting up dataset")
+    dataset = Kitti360Dataset(
+        data_path=data_path,
+        pose_path=data_path / "data_poses",
+        split_path=None,
+        return_stereo=True,
+        frame_count=1,
+        fisheye_rotation=rotation,
+        color_aug=False,
+        return_segmentation=False,
+    )
+    print("Setting up folders...")
+    for i in tqdm(range(len(dataset))):
+        sequence, id, is_right = dataset._datapoints[i]
+        if is_right:
+            continue
+        image_00 = data_path / "data_2d_raw" / sequence / "image_00" / f"data_{resolution[0]}x{resolution[1]}"
+        image_01 = data_path / "data_2d_raw" / sequence / "image_01" / f"data_{resolution[0]}x{resolution[1]}"
+        image_02 = data_path / "data_2d_raw" / sequence / "image_02" / f"data_{resolution[0]}x{resolution[1]}_{rotation[0]}x{rotation[1]}"
+        image_03 = data_path / "data_2d_raw" / sequence / "image_03" / f"data_{resolution[0]}x{resolution[1]}_{rotation[0]}x{rotation[1]}"
+        img_id = dataset._img_ids[sequence][id]
+        if (image_00 / f"{img_id:010d}.png").exists():
+            continue
+        data = dataset[i]
+        image_00.mkdir(exist_ok=True, parents=True)
+        image_01.mkdir(exist_ok=True, parents=True)
+        image_02.mkdir(exist_ok=True, parents=True)
+        image_03.mkdir(exist_ok=True, parents=True)
+        img_00 = (np.transpose(data["imgs"][0].numpy(), (1, 2, 0)) * .5 + .5) * 255.
+        img_01 = (np.transpose(data["imgs"][1].numpy(), (1, 2, 0)) * .5 + .5) * 255.
+        img_02 = (np.transpose(data["imgs"][2].numpy(), (1, 2, 0)) * .5 + .5) * 255.
+        img_03 = (np.transpose(data["imgs"][3].numpy(), (1, 2, 0)) * .5 + .5) * 255.
+        if not only_fisheye:
+            cv2.imwrite(str(image_00 / f"{img_id:010d}.png"), cv2.cvtColor(img_00, cv2.COLOR_RGB2BGR))
+            cv2.imwrite(str(image_01 / f"{img_id:010d}.png"), cv2.cvtColor(img_01, cv2.COLOR_RGB2BGR))
+        cv2.imwrite(str(image_02 / f"{img_id:010d}.png"), cv2.cvtColor(img_02, cv2.COLOR_RGB2BGR))
+        cv2.imwrite(str(image_03 / f"{img_id:010d}.png"), cv2.cvtColor(img_03, cv2.COLOR_RGB2BGR))
+if __name__ == "__main__":
+    main()

datasets/kitti_360/splits/seg/test_files.txt ADDED Viewed

	@@ -0,0 +1,446 @@

+2013_05_28_drive_0000_sync 0000000386 l
+2013_05_28_drive_0000_sync 0000000406 l
+2013_05_28_drive_0000_sync 0000000426 l
+2013_05_28_drive_0000_sync 0000000446 l
+2013_05_28_drive_0000_sync 0000000466 l
+2013_05_28_drive_0000_sync 0000000486 l
+2013_05_28_drive_0000_sync 0000000506 l
+2013_05_28_drive_0000_sync 0000000546 l
+2013_05_28_drive_0000_sync 0000000606 l
+2013_05_28_drive_0000_sync 0000000626 l
+2013_05_28_drive_0000_sync 0000000646 l
+2013_05_28_drive_0000_sync 0000000666 l
+2013_05_28_drive_0000_sync 0000000706 l
+2013_05_28_drive_0000_sync 0000000726 l
+2013_05_28_drive_0000_sync 0000000746 l
+2013_05_28_drive_0000_sync 0000000786 l
+2013_05_28_drive_0000_sync 0000000806 l
+2013_05_28_drive_0000_sync 0000000826 l
+2013_05_28_drive_0000_sync 0000000846 l
+2013_05_28_drive_0000_sync 0000000866 l
+2013_05_28_drive_0000_sync 0000000886 l
+2013_05_28_drive_0000_sync 0000000906 l
+2013_05_28_drive_0000_sync 0000000926 l
+2013_05_28_drive_0000_sync 0000000946 l
+2013_05_28_drive_0000_sync 0000000966 l
+2013_05_28_drive_0000_sync 0000000986 l
+2013_05_28_drive_0000_sync 0000001054 l
+2013_05_28_drive_0000_sync 0000001111 l
+2013_05_28_drive_0000_sync 0000001138 l
+2013_05_28_drive_0000_sync 0000001158 l
+2013_05_28_drive_0000_sync 0000001178 l
+2013_05_28_drive_0000_sync 0000001198 l
+2013_05_28_drive_0000_sync 0000001218 l
+2013_05_28_drive_0000_sync 0000001238 l
+2013_05_28_drive_0000_sync 0000001258 l
+2013_05_28_drive_0000_sync 0000001278 l
+2013_05_28_drive_0000_sync 0000001298 l
+2013_05_28_drive_0000_sync 0000001338 l
+2013_05_28_drive_0000_sync 0000001358 l
+2013_05_28_drive_0000_sync 0000001378 l
+2013_05_28_drive_0000_sync 0000001398 l
+2013_05_28_drive_0000_sync 0000001418 l
+2013_05_28_drive_0000_sync 0000001438 l
+2013_05_28_drive_0000_sync 0000001458 l
+2013_05_28_drive_0000_sync 0000001478 l
+2013_05_28_drive_0000_sync 0000001498 l
+2013_05_28_drive_0000_sync 0000001518 l
+2013_05_28_drive_0000_sync 0000001538 l
+2013_05_28_drive_0000_sync 0000001558 l
+2013_05_28_drive_0000_sync 0000001578 l
+2013_05_28_drive_0000_sync 0000001598 l
+2013_05_28_drive_0000_sync 0000001618 l
+2013_05_28_drive_0000_sync 0000001638 l
+2013_05_28_drive_0000_sync 0000001658 l
+2013_05_28_drive_0000_sync 0000001678 l
+2013_05_28_drive_0000_sync 0000001698 l
+2013_05_28_drive_0000_sync 0000001718 l
+2013_05_28_drive_0000_sync 0000001738 l
+2013_05_28_drive_0000_sync 0000001758 l
+2013_05_28_drive_0000_sync 0000001778 l
+2013_05_28_drive_0000_sync 0000001798 l
+2013_05_28_drive_0000_sync 0000001818 l
+2013_05_28_drive_0000_sync 0000001858 l
+2013_05_28_drive_0000_sync 0000001878 l
+2013_05_28_drive_0000_sync 0000001898 l
+2013_05_28_drive_0000_sync 0000001918 l
+2013_05_28_drive_0000_sync 0000001938 l
+2013_05_28_drive_0000_sync 0000001958 l
+2013_05_28_drive_0000_sync 0000001978 l
+2013_05_28_drive_0000_sync 0000002725 l
+2013_05_28_drive_0000_sync 0000002745 l
+2013_05_28_drive_0000_sync 0000002765 l
+2013_05_28_drive_0000_sync 0000002785 l
+2013_05_28_drive_0000_sync 0000002805 l
+2013_05_28_drive_0000_sync 0000002825 l
+2013_05_28_drive_0000_sync 0000002845 l
+2013_05_28_drive_0000_sync 0000002865 l
+2013_05_28_drive_0000_sync 0000002885 l
+2013_05_28_drive_0000_sync 0000002905 l
+2013_05_28_drive_0000_sync 0000003266 l
+2013_05_28_drive_0000_sync 0000003286 l
+2013_05_28_drive_0000_sync 0000003306 l
+2013_05_28_drive_0000_sync 0000003326 l
+2013_05_28_drive_0000_sync 0000003346 l
+2013_05_28_drive_0000_sync 0000003366 l
+2013_05_28_drive_0000_sync 0000003386 l
+2013_05_28_drive_0000_sync 0000003406 l
+2013_05_28_drive_0000_sync 0000003426 l
+2013_05_28_drive_0000_sync 0000003446 l
+2013_05_28_drive_0000_sync 0000003466 l
+2013_05_28_drive_0000_sync 0000003486 l
+2013_05_28_drive_0000_sync 0000003506 l
+2013_05_28_drive_0000_sync 0000003526 l
+2013_05_28_drive_0000_sync 0000003546 l
+2013_05_28_drive_0000_sync 0000003566 l
+2013_05_28_drive_0000_sync 0000003586 l
+2013_05_28_drive_0000_sync 0000003606 l
+2013_05_28_drive_0000_sync 0000003626 l
+2013_05_28_drive_0000_sync 0000003666 l
+2013_05_28_drive_0000_sync 0000003686 l
+2013_05_28_drive_0000_sync 0000003706 l
+2013_05_28_drive_0000_sync 0000003726 l
+2013_05_28_drive_0000_sync 0000003746 l
+2013_05_28_drive_0000_sync 0000003766 l
+2013_05_28_drive_0000_sync 0000003786 l
+2013_05_28_drive_0000_sync 0000003806 l
+2013_05_28_drive_0000_sync 0000003826 l
+2013_05_28_drive_0000_sync 0000003846 l
+2013_05_28_drive_0000_sync 0000003886 l
+2013_05_28_drive_0000_sync 0000003906 l
+2013_05_28_drive_0002_sync 0000004618 l
+2013_05_28_drive_0002_sync 0000004638 l
+2013_05_28_drive_0002_sync 0000004658 l
+2013_05_28_drive_0002_sync 0000004678 l
+2013_05_28_drive_0002_sync 0000004698 l
+2013_05_28_drive_0002_sync 0000004718 l
+2013_05_28_drive_0002_sync 0000004738 l
+2013_05_28_drive_0002_sync 0000004758 l
+2013_05_28_drive_0002_sync 0000004778 l
+2013_05_28_drive_0002_sync 0000004798 l
+2013_05_28_drive_0002_sync 0000004818 l
+2013_05_28_drive_0002_sync 0000004838 l
+2013_05_28_drive_0002_sync 0000004858 l
+2013_05_28_drive_0002_sync 0000004878 l
+2013_05_28_drive_0002_sync 0000004898 l
+2013_05_28_drive_0002_sync 0000004918 l
+2013_05_28_drive_0002_sync 0000004938 l
+2013_05_28_drive_0002_sync 0000004958 l
+2013_05_28_drive_0002_sync 0000004978 l
+2013_05_28_drive_0002_sync 0000005006 l
+2013_05_28_drive_0002_sync 0000005086 l
+2013_05_28_drive_0002_sync 0000005106 l
+2013_05_28_drive_0002_sync 0000005126 l
+2013_05_28_drive_0002_sync 0000005146 l
+2013_05_28_drive_0002_sync 0000005166 l
+2013_05_28_drive_0002_sync 0000005186 l
+2013_05_28_drive_0002_sync 0000005206 l
+2013_05_28_drive_0002_sync 0000005226 l
+2013_05_28_drive_0002_sync 0000005246 l
+2013_05_28_drive_0002_sync 0000005266 l
+2013_05_28_drive_0002_sync 0000005286 l
+2013_05_28_drive_0002_sync 0000005306 l
+2013_05_28_drive_0002_sync 0000005326 l
+2013_05_28_drive_0002_sync 0000005366 l
+2013_05_28_drive_0002_sync 0000005406 l
+2013_05_28_drive_0002_sync 0000005426 l
+2013_05_28_drive_0002_sync 0000005446 l
+2013_05_28_drive_0002_sync 0000005466 l
+2013_05_28_drive_0002_sync 0000005486 l
+2013_05_28_drive_0002_sync 0000005506 l
+2013_05_28_drive_0002_sync 0000005526 l
+2013_05_28_drive_0002_sync 0000005546 l
+2013_05_28_drive_0002_sync 0000005566 l
+2013_05_28_drive_0002_sync 0000005586 l
+2013_05_28_drive_0002_sync 0000005661 l
+2013_05_28_drive_0002_sync 0000005782 l
+2013_05_28_drive_0002_sync 0000005802 l
+2013_05_28_drive_0002_sync 0000005822 l
+2013_05_28_drive_0002_sync 0000006002 l
+2013_05_28_drive_0002_sync 0000006062 l
+2013_05_28_drive_0002_sync 0000006082 l
+2013_05_28_drive_0002_sync 0000006102 l
+2013_05_28_drive_0002_sync 0000006122 l
+2013_05_28_drive_0002_sync 0000006222 l
+2013_05_28_drive_0002_sync 0000006242 l
+2013_05_28_drive_0002_sync 0000006262 l
+2013_05_28_drive_0002_sync 0000006282 l
+2013_05_28_drive_0002_sync 0000006362 l
+2013_05_28_drive_0002_sync 0000006382 l
+2013_05_28_drive_0002_sync 0000015219 l
+2013_05_28_drive_0002_sync 0000015239 l
+2013_05_28_drive_0002_sync 0000015259 l
+2013_05_28_drive_0002_sync 0000015319 l
+2013_05_28_drive_0003_sync 0000000182 l
+2013_05_28_drive_0003_sync 0000000262 l
+2013_05_28_drive_0004_sync 0000002922 l
+2013_05_28_drive_0004_sync 0000003002 l
+2013_05_28_drive_0004_sync 0000003022 l
+2013_05_28_drive_0004_sync 0000003142 l
+2013_05_28_drive_0004_sync 0000003162 l
+2013_05_28_drive_0004_sync 0000003182 l
+2013_05_28_drive_0004_sync 0000003202 l
+2013_05_28_drive_0004_sync 0000003222 l
+2013_05_28_drive_0004_sync 0000003242 l
+2013_05_28_drive_0004_sync 0000003262 l
+2013_05_28_drive_0004_sync 0000003282 l
+2013_05_28_drive_0004_sync 0000003302 l
+2013_05_28_drive_0004_sync 0000003322 l
+2013_05_28_drive_0004_sync 0000003342 l
+2013_05_28_drive_0004_sync 0000003362 l
+2013_05_28_drive_0004_sync 0000003382 l
+2013_05_28_drive_0004_sync 0000003402 l
+2013_05_28_drive_0004_sync 0000003422 l
+2013_05_28_drive_0004_sync 0000003442 l
+2013_05_28_drive_0004_sync 0000003462 l
+2013_05_28_drive_0004_sync 0000003542 l
+2013_05_28_drive_0004_sync 0000003562 l
+2013_05_28_drive_0004_sync 0000003582 l
+2013_05_28_drive_0004_sync 0000003602 l
+2013_05_28_drive_0004_sync 0000003622 l
+2013_05_28_drive_0004_sync 0000003642 l
+2013_05_28_drive_0004_sync 0000003662 l
+2013_05_28_drive_0004_sync 0000003682 l
+2013_05_28_drive_0004_sync 0000003797 l
+2013_05_28_drive_0004_sync 0000003825 l
+2013_05_28_drive_0004_sync 0000003845 l
+2013_05_28_drive_0004_sync 0000003865 l
+2013_05_28_drive_0004_sync 0000003885 l
+2013_05_28_drive_0004_sync 0000003905 l
+2013_05_28_drive_0004_sync 0000003925 l
+2013_05_28_drive_0004_sync 0000003945 l
+2013_05_28_drive_0004_sync 0000003965 l
+2013_05_28_drive_0004_sync 0000004399 l
+2013_05_28_drive_0004_sync 0000004439 l
+2013_05_28_drive_0004_sync 0000004459 l
+2013_05_28_drive_0004_sync 0000004479 l
+2013_05_28_drive_0004_sync 0000004499 l
+2013_05_28_drive_0004_sync 0000004539 l
+2013_05_28_drive_0004_sync 0000004559 l
+2013_05_28_drive_0004_sync 0000004596 l
+2013_05_28_drive_0004_sync 0000004616 l
+2013_05_28_drive_0004_sync 0000004636 l
+2013_05_28_drive_0004_sync 0000004656 l
+2013_05_28_drive_0004_sync 0000004696 l
+2013_05_28_drive_0004_sync 0000004717 l
+2013_05_28_drive_0004_sync 0000004737 l
+2013_05_28_drive_0004_sync 0000004897 l
+2013_05_28_drive_0004_sync 0000004917 l
+2013_05_28_drive_0005_sync 0000004806 l
+2013_05_28_drive_0005_sync 0000004826 l
+2013_05_28_drive_0005_sync 0000004846 l
+2013_05_28_drive_0005_sync 0000004866 l
+2013_05_28_drive_0005_sync 0000004886 l
+2013_05_28_drive_0005_sync 0000004906 l
+2013_05_28_drive_0005_sync 0000004926 l
+2013_05_28_drive_0005_sync 0000004946 l
+2013_05_28_drive_0005_sync 0000004986 l
+2013_05_28_drive_0005_sync 0000005006 l
+2013_05_28_drive_0005_sync 0000005026 l
+2013_05_28_drive_0005_sync 0000005046 l
+2013_05_28_drive_0005_sync 0000005086 l
+2013_05_28_drive_0005_sync 0000005157 l
+2013_05_28_drive_0005_sync 0000005190 l
+2013_05_28_drive_0005_sync 0000005210 l
+2013_05_28_drive_0005_sync 0000005569 l
+2013_05_28_drive_0005_sync 0000005589 l
+2013_05_28_drive_0005_sync 0000005649 l
+2013_05_28_drive_0005_sync 0000005669 l
+2013_05_28_drive_0005_sync 0000005689 l
+2013_05_28_drive_0005_sync 0000005709 l
+2013_05_28_drive_0005_sync 0000005729 l
+2013_05_28_drive_0005_sync 0000005749 l
+2013_05_28_drive_0005_sync 0000005769 l
+2013_05_28_drive_0005_sync 0000005809 l
+2013_05_28_drive_0005_sync 0000005829 l
+2013_05_28_drive_0005_sync 0000005883 l
+2013_05_28_drive_0005_sync 0000005971 l
+2013_05_28_drive_0005_sync 0000005991 l
+2013_05_28_drive_0005_sync 0000006011 l
+2013_05_28_drive_0005_sync 0000006031 l
+2013_05_28_drive_0005_sync 0000006051 l
+2013_05_28_drive_0005_sync 0000006071 l
+2013_05_28_drive_0005_sync 0000006131 l
+2013_05_28_drive_0005_sync 0000006151 l
+2013_05_28_drive_0005_sync 0000006211 l
+2013_05_28_drive_0005_sync 0000006251 l
+2013_05_28_drive_0005_sync 0000006271 l
+2013_05_28_drive_0006_sync 0000000130 l
+2013_05_28_drive_0006_sync 0000000150 l
+2013_05_28_drive_0006_sync 0000000170 l
+2013_05_28_drive_0006_sync 0000000210 l
+2013_05_28_drive_0006_sync 0000000230 l
+2013_05_28_drive_0006_sync 0000000250 l
+2013_05_28_drive_0006_sync 0000000290 l
+2013_05_28_drive_0006_sync 0000000310 l
+2013_05_28_drive_0006_sync 0000000330 l
+2013_05_28_drive_0006_sync 0000000350 l
+2013_05_28_drive_0006_sync 0000000370 l
+2013_05_28_drive_0006_sync 0000000430 l
+2013_05_28_drive_0006_sync 0000000450 l
+2013_05_28_drive_0006_sync 0000000470 l
+2013_05_28_drive_0006_sync 0000000490 l
+2013_05_28_drive_0006_sync 0000000510 l
+2013_05_28_drive_0006_sync 0000000551 l
+2013_05_28_drive_0006_sync 0000000622 l
+2013_05_28_drive_0006_sync 0000000642 l
+2013_05_28_drive_0006_sync 0000000662 l
+2013_05_28_drive_0006_sync 0000000682 l
+2013_05_28_drive_0006_sync 0000000702 l
+2013_05_28_drive_0006_sync 0000000722 l
+2013_05_28_drive_0006_sync 0000000742 l
+2013_05_28_drive_0006_sync 0000000822 l
+2013_05_28_drive_0006_sync 0000000842 l
+2013_05_28_drive_0006_sync 0000000862 l
+2013_05_28_drive_0006_sync 0000000882 l
+2013_05_28_drive_0006_sync 0000000902 l
+2013_05_28_drive_0006_sync 0000000922 l
+2013_05_28_drive_0006_sync 0000000962 l
+2013_05_28_drive_0006_sync 0000000982 l
+2013_05_28_drive_0006_sync 0000001062 l
+2013_05_28_drive_0006_sync 0000001082 l
+2013_05_28_drive_0006_sync 0000001102 l
+2013_05_28_drive_0006_sync 0000001142 l
+2013_05_28_drive_0006_sync 0000001162 l
+2013_05_28_drive_0006_sync 0000001182 l
+2013_05_28_drive_0006_sync 0000001202 l
+2013_05_28_drive_0006_sync 0000002304 l
+2013_05_28_drive_0006_sync 0000002324 l
+2013_05_28_drive_0006_sync 0000002344 l
+2013_05_28_drive_0006_sync 0000002364 l
+2013_05_28_drive_0006_sync 0000002384 l
+2013_05_28_drive_0006_sync 0000002404 l
+2013_05_28_drive_0006_sync 0000002424 l
+2013_05_28_drive_0006_sync 0000002444 l
+2013_05_28_drive_0006_sync 0000002464 l
+2013_05_28_drive_0006_sync 0000002484 l
+2013_05_28_drive_0006_sync 0000002592 l
+2013_05_28_drive_0006_sync 0000002613 l
+2013_05_28_drive_0006_sync 0000002633 l
+2013_05_28_drive_0006_sync 0000002673 l
+2013_05_28_drive_0006_sync 0000002693 l
+2013_05_28_drive_0006_sync 0000002733 l
+2013_05_28_drive_0006_sync 0000002753 l
+2013_05_28_drive_0006_sync 0000002773 l
+2013_05_28_drive_0006_sync 0000002793 l
+2013_05_28_drive_0006_sync 0000009236 l
+2013_05_28_drive_0006_sync 0000009256 l
+2013_05_28_drive_0006_sync 0000009296 l
+2013_05_28_drive_0006_sync 0000009316 l
+2013_05_28_drive_0006_sync 0000009336 l
+2013_05_28_drive_0006_sync 0000009376 l
+2013_05_28_drive_0006_sync 0000009396 l
+2013_05_28_drive_0006_sync 0000009416 l
+2013_05_28_drive_0006_sync 0000009456 l
+2013_05_28_drive_0006_sync 0000009476 l
+2013_05_28_drive_0006_sync 0000009496 l
+2013_05_28_drive_0006_sync 0000009516 l
+2013_05_28_drive_0006_sync 0000009536 l
+2013_05_28_drive_0007_sync 0000000019 l
+2013_05_28_drive_0007_sync 0000000039 l
+2013_05_28_drive_0007_sync 0000000059 l
+2013_05_28_drive_0007_sync 0000000079 l
+2013_05_28_drive_0007_sync 0000000099 l
+2013_05_28_drive_0007_sync 0000000119 l
+2013_05_28_drive_0007_sync 0000000139 l
+2013_05_28_drive_0007_sync 0000000159 l
+2013_05_28_drive_0007_sync 0000000179 l
+2013_05_28_drive_0007_sync 0000000199 l
+2013_05_28_drive_0007_sync 0000000219 l
+2013_05_28_drive_0007_sync 0000000439 l
+2013_05_28_drive_0009_sync 0000001030 l
+2013_05_28_drive_0009_sync 0000001050 l
+2013_05_28_drive_0009_sync 0000001070 l
+2013_05_28_drive_0009_sync 0000001090 l
+2013_05_28_drive_0009_sync 0000001110 l
+2013_05_28_drive_0009_sync 0000001130 l
+2013_05_28_drive_0009_sync 0000001150 l
+2013_05_28_drive_0009_sync 0000001170 l
+2013_05_28_drive_0009_sync 0000001190 l
+2013_05_28_drive_0009_sync 0000001210 l
+2013_05_28_drive_0009_sync 0000001230 l
+2013_05_28_drive_0009_sync 0000001250 l
+2013_05_28_drive_0009_sync 0000001270 l
+2013_05_28_drive_0009_sync 0000001290 l
+2013_05_28_drive_0009_sync 0000001310 l
+2013_05_28_drive_0009_sync 0000001330 l
+2013_05_28_drive_0009_sync 0000001350 l
+2013_05_28_drive_0009_sync 0000001370 l
+2013_05_28_drive_0009_sync 0000004495 l
+2013_05_28_drive_0009_sync 0000004555 l
+2013_05_28_drive_0009_sync 0000004575 l
+2013_05_28_drive_0009_sync 0000004595 l
+2013_05_28_drive_0009_sync 0000004615 l
+2013_05_28_drive_0009_sync 0000004635 l
+2013_05_28_drive_0009_sync 0000004655 l
+2013_05_28_drive_0009_sync 0000004675 l
+2013_05_28_drive_0009_sync 0000004695 l
+2013_05_28_drive_0009_sync 0000004719 l
+2013_05_28_drive_0009_sync 0000004845 l
+2013_05_28_drive_0009_sync 0000004869 l
+2013_05_28_drive_0009_sync 0000004889 l
+2013_05_28_drive_0009_sync 0000005184 l
+2013_05_28_drive_0009_sync 0000005204 l
+2013_05_28_drive_0009_sync 0000005224 l
+2013_05_28_drive_0009_sync 0000005244 l
+2013_05_28_drive_0009_sync 0000005264 l
+2013_05_28_drive_0009_sync 0000005284 l
+2013_05_28_drive_0009_sync 0000005304 l
+2013_05_28_drive_0009_sync 0000005324 l
+2013_05_28_drive_0009_sync 0000005344 l
+2013_05_28_drive_0009_sync 0000005364 l
+2013_05_28_drive_0009_sync 0000005384 l
+2013_05_28_drive_0009_sync 0000005404 l
+2013_05_28_drive_0009_sync 0000005424 l
+2013_05_28_drive_0009_sync 0000005444 l
+2013_05_28_drive_0009_sync 0000005464 l
+2013_05_28_drive_0009_sync 0000005484 l
+2013_05_28_drive_0009_sync 0000005504 l
+2013_05_28_drive_0009_sync 0000005524 l
+2013_05_28_drive_0009_sync 0000005544 l
+2013_05_28_drive_0009_sync 0000005564 l
+2013_05_28_drive_0009_sync 0000005584 l
+2013_05_28_drive_0009_sync 0000005624 l
+2013_05_28_drive_0009_sync 0000005644 l
+2013_05_28_drive_0009_sync 0000005664 l
+2013_05_28_drive_0009_sync 0000005684 l
+2013_05_28_drive_0009_sync 0000005704 l
+2013_05_28_drive_0009_sync 0000006291 l
+2013_05_28_drive_0009_sync 0000006311 l
+2013_05_28_drive_0009_sync 0000006351 l
+2013_05_28_drive_0009_sync 0000006371 l
+2013_05_28_drive_0009_sync 0000006391 l
+2013_05_28_drive_0009_sync 0000006411 l
+2013_05_28_drive_0009_sync 0000006431 l
+2013_05_28_drive_0009_sync 0000006451 l
+2013_05_28_drive_0009_sync 0000006471 l
+2013_05_28_drive_0009_sync 0000006491 l
+2013_05_28_drive_0009_sync 0000006511 l
+2013_05_28_drive_0010_sync 0000001896 l
+2013_05_28_drive_0010_sync 0000001916 l
+2013_05_28_drive_0010_sync 0000001936 l
+2013_05_28_drive_0010_sync 0000001956 l
+2013_05_28_drive_0010_sync 0000001976 l
+2013_05_28_drive_0010_sync 0000001996 l
+2013_05_28_drive_0010_sync 0000002016 l
+2013_05_28_drive_0010_sync 0000002036 l
+2013_05_28_drive_0010_sync 0000002056 l
+2013_05_28_drive_0010_sync 0000002076 l
+2013_05_28_drive_0010_sync 0000002096 l
+2013_05_28_drive_0010_sync 0000002145 l
+2013_05_28_drive_0010_sync 0000002165 l
+2013_05_28_drive_0010_sync 0000002185 l
+2013_05_28_drive_0010_sync 0000002205 l
+2013_05_28_drive_0010_sync 0000002225 l
+2013_05_28_drive_0010_sync 0000002615 l
+2013_05_28_drive_0010_sync 0000002635 l
+2013_05_28_drive_0010_sync 0000002655 l
+2013_05_28_drive_0010_sync 0000002675 l
+2013_05_28_drive_0010_sync 0000002695 l
+2013_05_28_drive_0010_sync 0000002755 l
+2013_05_28_drive_0010_sync 0000002795 l
+2013_05_28_drive_0010_sync 0000002815 l
+2013_05_28_drive_0010_sync 0000002835 l
+2013_05_28_drive_0010_sync 0000002855 l
+2013_05_28_drive_0010_sync 0000002875 l
+2013_05_28_drive_0010_sync 0000002895 l

datasets/kitti_360/splits/seg/train_files.txt ADDED Viewed

The diff for this file is too large to render. See raw diff