jev-aleks commited on
Commit
9e15541
·
1 Parent(s): 975fa86

scenedino init

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. .gitignore +213 -0
  3. LICENSE.txt +201 -0
  4. README.md +2 -1
  5. app.py +213 -0
  6. configs/dataset/bdd_seg.yaml +2 -0
  7. configs/dataset/cityscapes_seg.yaml +2 -0
  8. configs/dataset/kitti_360_sscbench.yaml +15 -0
  9. configs/dataset/realestate10k.yaml +3 -0
  10. configs/downstream/semantic.yaml +13 -0
  11. configs/evaluate_semantic_bdd.yaml +50 -0
  12. configs/evaluate_semantic_cityscapes.yaml +50 -0
  13. configs/evaluate_semantic_kitti_360.yaml +50 -0
  14. configs/model/dino_downsampler.yaml +64 -0
  15. configs/model/dino_upsampler.yaml +64 -0
  16. configs/model/dinov2_downsampler.yaml +64 -0
  17. configs/renderer/pixelnerf.yaml +9 -0
  18. configs/train_scenedino_kitti_360.yaml +43 -0
  19. configs/train_scenedino_re10k.yaml +49 -0
  20. configs/train_semantic_kitti_360.yaml +52 -0
  21. configs/training/loss/scenedino.yaml +15 -0
  22. configs/training/loss/semantic.yaml +10 -0
  23. configs/training/optimizer/scenedino.yaml +7 -0
  24. configs/training/optimizer/semantic.yaml +7 -0
  25. configs/training/scenedino.yaml +21 -0
  26. configs/training/scheduler/scenedino.yaml +3 -0
  27. configs/training/semantic.yaml +16 -0
  28. configs/validation/scenedino.yaml +78 -0
  29. configs/validation/semantic.yaml +80 -0
  30. datasets/__init__.py +0 -0
  31. datasets/bdd/bdd_dataset.py +164 -0
  32. datasets/cityscapes/cityscapes_dataset.py +82 -0
  33. datasets/data_util.py +307 -0
  34. datasets/kitti_360/__init__.py +0 -0
  35. datasets/kitti_360/annotation.py +538 -0
  36. datasets/kitti_360/compute_kitti_360_bbox_split.py +110 -0
  37. datasets/kitti_360/kitti_360_dataset.py +1263 -0
  38. datasets/kitti_360/labels.py +200 -0
  39. datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0000_sync/poses.txt +0 -0
  40. datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0002_sync/poses.txt +0 -0
  41. datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0003_sync/poses.txt +0 -0
  42. datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0004_sync/poses.txt +0 -0
  43. datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0005_sync/poses.txt +0 -0
  44. datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0006_sync/poses.txt +0 -0
  45. datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0007_sync/poses.txt +0 -0
  46. datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0009_sync/poses.txt +0 -0
  47. datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0010_sync/poses.txt +0 -0
  48. datasets/kitti_360/preprocess_kitti_360.py +81 -0
  49. datasets/kitti_360/splits/seg/test_files.txt +446 -0
  50. datasets/kitti_360/splits/seg/train_files.txt +0 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
37
+ *.jpg filter=lfs diff=lfs merge=lfs -text
38
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
208
+
209
+ # Gradio
210
+ .gradio
211
+
212
+ # outputs and checkpoints
213
+ out/
LICENSE.txt ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md CHANGED
@@ -1,9 +1,10 @@
1
  ---
2
  title: SceneDINO
3
- emoji: 📈
4
  colorFrom: blue
5
  colorTo: pink
6
  sdk: gradio
 
7
  sdk_version: 5.35.0
8
  app_file: app.py
9
  pinned: false
 
1
  ---
2
  title: SceneDINO
3
+ emoji: 🦕
4
  colorFrom: blue
5
  colorTo: pink
6
  sdk: gradio
7
+ python_version: 3.10
8
  sdk_version: 5.35.0
9
  app_file: app.py
10
  pinned: false
app.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from demo_utils.utils import (load_modules,
2
+ load_sample_from_path,
3
+ load_sample_from_dataset,
4
+ get_fov_mask,
5
+ inference_3d,
6
+ inference_rendered_2d)
7
+
8
+ import tempfile
9
+ import os
10
+ import sys
11
+ import yaml
12
+
13
+ sys.path.append("./sscbench")
14
+ from sscbench.gen_voxelgrid_npy import save_as_voxel_ply, classes_to_colors
15
+ from download_checkpoint_hf import download_scenedino_checkpoint
16
+
17
+ import torch
18
+
19
+ import numpy as np
20
+ import gradio as gr
21
+ import open3d as o3d
22
+ import spaces
23
+
24
+
25
+ # Load checkpoints from Hugging Face
26
+ download_scenedino_checkpoint("ssc-kitti-360-dino")
27
+ download_scenedino_checkpoint("ssc-kitti-360-dinov2")
28
+
29
+ # Load model, ray sampler, datasets
30
+ ckpt_path = "out/scenedino-pretrained/seg-best-dino/"
31
+ ckpt_name = "checkpoint.pt"
32
+ net_v1, renderer_v1, ray_sampler_v1, test_dataset = load_modules(ckpt_path, ckpt_name)
33
+ renderer_v1.eval()
34
+
35
+ ckpt_path = "out/scenedino-pretrained/seg-best-dinov2/"
36
+ ckpt_name = "checkpoint.pt"
37
+ net_v2, renderer_v2, ray_sampler_v2, _ = load_modules(ckpt_path, ckpt_name)
38
+ renderer_v2.eval()
39
+
40
+
41
+ def convert_voxels(arr, map_dict):
42
+ f = np.vectorize(map_dict.__getitem__)
43
+ return f(arr)
44
+
45
+ with open("sscbench/label_maps.yaml", "r") as f:
46
+ label_maps = yaml.safe_load(f)
47
+
48
+
49
+ @spaces.GPU(duration=60)
50
+ def demo_run(image: str,
51
+ backbone: str,
52
+ mode: str,
53
+ sigma_threshold: float,
54
+ resolution: float,
55
+ x_range: int,
56
+ y_range: int,
57
+ z_range: int):
58
+
59
+ if backbone == "DINO (ViT-B)":
60
+ net, renderer, ray_sampler = net_v1, renderer_v1, ray_sampler_v1
61
+ elif backbone == "DINOv2 (ViT-B)":
62
+ net, renderer, ray_sampler = net_v2, renderer_v2, ray_sampler_v2
63
+
64
+ prediction_mode = "stego_kmeans"
65
+ if mode == "Feature PCA 1-3":
66
+ segmentation = False
67
+ rgb_from_pca_dim = 0
68
+ elif mode == "Feature PCA 4-6":
69
+ segmentation = False
70
+ rgb_from_pca_dim = 3
71
+ elif mode == "Feature PCA 7-9":
72
+ segmentation = False
73
+ rgb_from_pca_dim = 6
74
+ elif mode == "SSC (unsup.)":
75
+ segmentation = True
76
+ elif mode == "SSC (linear)":
77
+ segmentation = True
78
+ prediction_mode = "direct_linear"
79
+
80
+ # Necessary when reading from examples? cast from str
81
+ sigma_threshold, resolution = float(sigma_threshold), float(resolution)
82
+ x_range, y_range, z_range = int(x_range), int(y_range), int(z_range)
83
+
84
+ # Too many voxels
85
+ max_voxel_count = 5000000
86
+ voxel_count = (x_range//resolution + 1) * (y_range//resolution + 1) * (z_range//resolution + 1)
87
+ if voxel_count > max_voxel_count:
88
+ raise gr.Error(f"Too many voxels ({int(voxel_count) / 1_000_000:.1f}M > {max_voxel_count / 1_000_000:.1f}M).\n" +
89
+ "Reduce voxel resolution or range.", duration=5)
90
+
91
+ with torch.no_grad():
92
+ images, poses, projs = load_sample_from_path(image, intrinsic=None)
93
+
94
+ net.encode(images, projs, poses, ids_encoder=[0])
95
+ net.set_scale(0)
96
+
97
+ # 2D Features output
98
+ dino_full_2d, depth_2d, seg_2d = inference_rendered_2d(net, poses, projs, ray_sampler, renderer, prediction_mode)
99
+ net.encoder.fit_visualization(dino_full_2d.flatten(0, -2))
100
+
101
+ if segmentation:
102
+ output_2d = convert_voxels(seg_2d.detach().cpu(), label_maps["cityscapes_to_label"])
103
+ output_2d = classes_to_colors[output_2d].cpu().detach().numpy()
104
+ else:
105
+ output_2d = net.encoder.transform_visualization(dino_full_2d, from_dim=rgb_from_pca_dim)
106
+ output_2d -= output_2d.min()
107
+ output_2d /= output_2d.max()
108
+ output_2d = output_2d.cpu().detach().numpy()
109
+
110
+ # Chunking
111
+ max_chunk_size = 100000
112
+ z_layers_per_chunk = max_chunk_size // ((x_range//resolution + 1) * (y_range//resolution + 1))
113
+
114
+ # 3D Features output
115
+ x_range = (-x_range/2, x_range)
116
+ y_range = (-y_range/2, y_range)
117
+ z_range = (0, z_range)
118
+
119
+ is_occupied, output_3d, fov_mask = [], [], []
120
+ current_z = 0
121
+
122
+ while current_z <= z_range[1]:
123
+ z_range_chunk = (current_z, min(current_z + z_layers_per_chunk*resolution, z_range[1]))
124
+ current_z += (z_layers_per_chunk+1) * resolution
125
+
126
+ xyz_chunk, dino_full_3d_chunk, sigma_3d_chunk, seg_3d_chunk = inference_3d(net, x_range, y_range, z_range_chunk, resolution, prediction_mode)
127
+ fov_mask_chunk = get_fov_mask(projs[0, 0], xyz_chunk)
128
+
129
+ is_occupied_chunk = sigma_3d_chunk > sigma_threshold
130
+
131
+ if segmentation:
132
+ output_3d_chunk = seg_3d_chunk
133
+ else:
134
+ output_3d_chunk = net.encoder.transform_visualization(dino_full_3d_chunk, from_dim=rgb_from_pca_dim)
135
+ output_3d_chunk -= output_3d_chunk.min()
136
+ output_3d_chunk /= output_3d_chunk.max()
137
+
138
+ output_3d_chunk = torch.clamp(output_3d_chunk*1.2 - 0.1, 0.0, 1.0)
139
+ output_3d_chunk = (255*output_3d_chunk).int()
140
+
141
+ fov_mask_chunk = fov_mask_chunk.reshape(is_occupied_chunk.shape)
142
+
143
+ is_occupied.append(is_occupied_chunk)
144
+ output_3d.append(output_3d_chunk)
145
+ fov_mask.append(fov_mask_chunk)
146
+
147
+ is_occupied = torch.cat(is_occupied, dim=2)
148
+ output_3d = torch.cat(output_3d, dim=2)
149
+ fov_mask = torch.cat(fov_mask, dim=2)
150
+
151
+ temp_dir = tempfile.gettempdir()
152
+ ply_path = os.path.join(temp_dir, "output.ply")
153
+
154
+ if segmentation:
155
+ # mapped to "unlabeled"
156
+ is_occupied[output_3d == 10] = 0
157
+ is_occupied[output_3d == 12] = 0
158
+
159
+ save_as_voxel_ply(ply_path,
160
+ is_occupied.detach().cpu(),
161
+ voxel_size=resolution,
162
+ size=is_occupied.size(),
163
+ classes=torch.Tensor(
164
+ convert_voxels(
165
+ output_3d.detach().cpu(),
166
+ label_maps["cityscapes_to_label"])),
167
+ fov_mask=fov_mask)
168
+ else:
169
+ save_as_voxel_ply(ply_path,
170
+ is_occupied.detach().cpu(),
171
+ voxel_size=resolution,
172
+ size=is_occupied.size(),
173
+ colors=output_3d.detach().cpu(),
174
+ fov_mask=fov_mask)
175
+
176
+ mesh = o3d.io.read_triangle_mesh(ply_path)
177
+ glb_path = os.path.join(temp_dir, "output.glb")
178
+ o3d.io.write_triangle_mesh(glb_path, mesh, write_ascii=True)
179
+
180
+ del dino_full_2d, depth_2d, seg_2d
181
+ del dino_full_3d_chunk, sigma_3d_chunk, seg_3d_chunk, is_occupied_chunk
182
+ del is_occupied, output_3d, fov_mask
183
+
184
+ torch.cuda.empty_cache()
185
+
186
+ return output_2d, glb_path
187
+
188
+
189
+ demo = gr.Interface(
190
+ demo_run,
191
+ inputs=[
192
+ gr.Image(label="Input image", type="filepath"),
193
+ gr.Radio(label="Backbone", choices=["DINO (ViT-B)", "DINOv2 (ViT-B)"]),
194
+ gr.Radio(label="Mode", choices=["Feature PCA 1-3", "Feature PCA 4-6", "Feature PCA 7-9", "SSC (unsup.)", "SSC (linear)"]),
195
+ gr.Slider(label="Density threshold", minimum=0, maximum=1, step=0.05, value=0.2),
196
+ gr.Slider(label="Resolution [m]", minimum=0.05, maximum=0.5, step=0.1, value=0.2),
197
+ gr.Slider(label="X Range [m]", minimum=1, maximum=50, step=1, value=10),
198
+ gr.Slider(label="Y Range [m]", minimum=1, maximum=50, step=1, value=10),
199
+ gr.Slider(label="Z Range [m]", minimum=1, maximum=100, step=1, value=20),
200
+ ],
201
+ outputs=[
202
+ gr.Image(label="Rendered 2D Visualization"),
203
+ gr.Model3D(label="Voxel Surface 3D Visualization",
204
+ zoom_speed=0.5, pan_speed=0.5,
205
+ clear_color=[0.0, 0.0, 0.0, 0.0],
206
+ camera_position=[-90, 80, None],
207
+ display_mode="solid"),
208
+ ],
209
+ title="SceneDINO Demo",
210
+ examples="demo_utils/examples",
211
+ )
212
+
213
+ demo.launch()
configs/dataset/bdd_seg.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ type: "BDD_seg"
2
+ data_path: "<PATH-BDD>"
configs/dataset/cityscapes_seg.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ type: "Cityscapes_seg"
2
+ data_path: "<PATH-CITYSCAPES>"
configs/dataset/kitti_360_sscbench.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ type: "old_KITTI_360"
2
+ data_path: "<PATH-KITTI-360>"
3
+ pose_path: "<PATH-KITTI-360-DATA-POSES>"
4
+ split_path: "datasets/kitti_360/splits/sscbench"
5
+ image_size: [ 192, 640 ]
6
+ data_stereo: true
7
+ data_fisheye: true
8
+ data_fc: 2
9
+ # dilation: 10
10
+ # color_aug: true
11
+ fisheye_offset: [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]
12
+ stereo_offset: [0]
13
+ is_preprocessed: true
14
+ fisheye_rotation: -15
15
+ data_segmentation: true
configs/dataset/realestate10k.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ type: "RealEstate10K"
2
+ data_path: "<PATH-REALESTATE-PKL>.pickle"
3
+ image_size: [288, 512]
configs/downstream/semantic.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ type: "segmentation"
2
+
3
+ n_classes: 19
4
+ gt_classes: 19
5
+ input_dim: 384
6
+ code_dim: 64
7
+
8
+ knn_neighbors: 4
9
+ buffer_size: 256
10
+ patch_sample_size: 576
11
+
12
+ mode: "3d"
13
+ apply_crf: False
configs/evaluate_semantic_bdd.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - dataset: bdd_seg
3
+ - model: dino_downsampler
4
+ - renderer: pixelnerf
5
+ - training: semantic
6
+ - validation: semantic
7
+ - downstream: semantic
8
+ - _self_
9
+
10
+ training_type: "downstream_training"
11
+ mode: "nvs"
12
+ seed: 0
13
+ backend: null
14
+ nproc_per_node: null
15
+ with_amp: false
16
+ name: "training"
17
+ batch_size: 1
18
+ gradient_accum_factor: 1
19
+ num_workers: 6
20
+
21
+ renderer:
22
+ n_coarse : 32
23
+ n_fine : 0
24
+ n_fine_depth : 0
25
+ depth_std : 1.0
26
+ sched : []
27
+ white_bkgd : false
28
+ lindisp: true
29
+ hard_alpha_cap: true
30
+ render_mode: volumetric
31
+ eval_batch_size: 65536
32
+ normalize_dino: true
33
+
34
+ # eval_visualize: [0, 1, 2, 3]
35
+
36
+ output:
37
+ path: "out/evaluation-paper"
38
+ unique_id: evaluation-bdd
39
+
40
+ checkpoint: "<PATH-FEATURE-CHECKPOINT>.pt"
41
+
42
+ evaluations:
43
+ - type: seg
44
+ agg_type: unsup_seg
45
+ args:
46
+ n_classes: 19
47
+ gt_classes: 19
48
+
49
+ downstream:
50
+ input_dim: 768
configs/evaluate_semantic_cityscapes.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - dataset: cityscapes_seg
3
+ - model: dino_downsampler
4
+ - renderer: pixelnerf
5
+ - training: semantic
6
+ - validation: semantic
7
+ - downstream: semantic
8
+ - _self_
9
+
10
+ training_type: "downstream_training"
11
+ mode: "nvs"
12
+ seed: 0
13
+ backend: null
14
+ nproc_per_node: null
15
+ with_amp: false
16
+ name: "training"
17
+ batch_size: 1
18
+ gradient_accum_factor: 1
19
+ num_workers: 6
20
+
21
+ renderer:
22
+ n_coarse : 32
23
+ n_fine : 0
24
+ n_fine_depth : 0
25
+ depth_std : 1.0
26
+ sched : []
27
+ white_bkgd : false
28
+ lindisp: true
29
+ hard_alpha_cap: true
30
+ render_mode: volumetric
31
+ eval_batch_size: 65536
32
+ normalize_dino: true
33
+
34
+ # eval_visualize: [0, 1, 2, 3]
35
+
36
+ output:
37
+ path: "out/evaluation-paper"
38
+ unique_id: evaluation-cityscapes
39
+
40
+ checkpoint: "<PATH-FEATURE-CHECKPOINT>.pt"
41
+
42
+ evaluations:
43
+ - type: seg
44
+ agg_type: unsup_seg
45
+ args:
46
+ n_classes: 19
47
+ gt_classes: 19
48
+
49
+ downstream:
50
+ input_dim: 768
configs/evaluate_semantic_kitti_360.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - dataset: kitti_360_sscbench
3
+ - model: dino_downsampler
4
+ - renderer: pixelnerf
5
+ - training: semantic
6
+ - validation: semantic
7
+ - downstream: semantic
8
+ - _self_
9
+
10
+ training_type: "downstream_training"
11
+ mode: "nvs"
12
+ seed: 0
13
+ backend: null
14
+ nproc_per_node: null
15
+ with_amp: false
16
+ name: "training"
17
+ batch_size: 1
18
+ gradient_accum_factor: 1
19
+ num_workers: 6
20
+
21
+ renderer:
22
+ n_coarse : 32
23
+ n_fine : 0
24
+ n_fine_depth : 0
25
+ depth_std : 1.0
26
+ sched : []
27
+ white_bkgd : false
28
+ lindisp: true
29
+ hard_alpha_cap: true
30
+ render_mode: volumetric
31
+ eval_batch_size: 65536
32
+ normalize_dino: true
33
+
34
+ # eval_visualize: [0, 1, 2, 3]
35
+
36
+ output:
37
+ path: "out/evaluation-paper"
38
+ unique_id: evaluation-kitti-360-sscbench
39
+
40
+ checkpoint: "<PATH-FEATURE-CHECKPOINT>.pt"
41
+
42
+ evaluations:
43
+ - type: seg
44
+ agg_type: unsup_seg
45
+ args:
46
+ n_classes: 19
47
+ gt_classes: 19
48
+
49
+ downstream:
50
+ input_dim: 768
configs/model/dino_downsampler.yaml ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ arch: "BTSNet"
2
+ use_code: true
3
+ prediction_mode: default
4
+
5
+ predict_dino: true
6
+ dino_dims: 64 # == encoder.pca_dino_out
7
+
8
+ compensate_artifacts: true
9
+ flip_augmentation: true
10
+
11
+ encoder:
12
+ type: "dinov2"
13
+ mode: "downsample-prediction" # upsample-gt, downsample-pred
14
+ decoder_arch: "dpt"
15
+ # upsampler_arch: "multiscale-crop" # multiscale-crop, nearest
16
+ downsampler_arch: "featup" # featup, bilinear
17
+ encoder_arch: "vit-b" # vit-s, vit-b
18
+ version: "v1" # v1, v2, reg, fit3d
19
+ separate_gt_version: "v1" # v1, v2, reg, fit3d, None
20
+ encoder_freeze: false
21
+ flip_avg_gt: false
22
+ dim_reduction_arch: "mlp"
23
+ num_ch_enc: [64, 64, 128, 256]
24
+ intermediate_features: [3, 6, 9]
25
+ decoder_out_dim: 256
26
+ dino_pca_dim: 64 # == dino_dims
27
+ image_size: [192, 640]
28
+ key_features: false
29
+
30
+ code:
31
+ num_freqs: 6
32
+ freq_factor: 1.5
33
+ include_input: true
34
+
35
+ decoder_heads:
36
+ - type: "resnet"
37
+ name: "normal_head"
38
+ freeze: false
39
+ args:
40
+ n_blocks: 0
41
+ d_hidden: 128
42
+ final_prediction_head: "normal_head"
43
+
44
+ encoding_strategy:
45
+ name: "default"
46
+ args: {}
47
+ eval_encoding_strategy:
48
+ name: "default"
49
+ args: null
50
+ loss_renderer_strategy:
51
+ name: "kitti_360"
52
+ args: null
53
+ eval_loss_renderer_strategy:
54
+ name: "single_renderer"
55
+ args:
56
+ shuffle_frames: false
57
+ all_frames: true
58
+
59
+ inv_z: true
60
+
61
+ learn_empty: false
62
+ code_mode: z
63
+
64
+ n_frames_render: 4 # number of frames to render among v==8
configs/model/dino_upsampler.yaml ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ arch: "BTSNet"
2
+ use_code: true
3
+ prediction_mode: default
4
+
5
+ predict_dino: true
6
+ dino_dims: 64 # == encoder.pca_dino_out
7
+
8
+ compensate_artifacts: false
9
+ flip_augmentation: true
10
+
11
+ encoder:
12
+ type: "dinov2"
13
+ mode: "upsample-gt" # upsample-gt, downsample-pred
14
+ decoder_arch: "dpt"
15
+ upsampler_arch: "multiscale-crop" # multiscale-crop, nearest
16
+ # downsampler_arch: "featup" # featup, bilinear
17
+ encoder_arch: "vit-b" # vit-s, vit-b
18
+ version: "v1" # v1, v2, reg, fit3d
19
+ separate_gt_version: "v1" # v1, v2, reg, fit3d, None
20
+ encoder_freeze: false
21
+ flip_avg_gt: false
22
+ dim_reduction_arch: "mlp"
23
+ num_ch_enc: [64, 64, 128, 256]
24
+ intermediate_features: [3, 6, 9]
25
+ decoder_out_dim: 256
26
+ dino_pca_dim: 64 # == dino_dims
27
+ image_size: [192, 640]
28
+ key_features: false
29
+
30
+ code:
31
+ num_freqs: 6
32
+ freq_factor: 1.5
33
+ include_input: true
34
+
35
+ decoder_heads:
36
+ - type: "resnet"
37
+ name: "normal_head"
38
+ freeze: false
39
+ args:
40
+ n_blocks: 0
41
+ d_hidden: 128
42
+ final_prediction_head: "normal_head"
43
+
44
+ encoding_strategy:
45
+ name: "default"
46
+ args: {}
47
+ eval_encoding_strategy:
48
+ name: "default"
49
+ args: null
50
+ loss_renderer_strategy:
51
+ name: "kitti_360"
52
+ args: null
53
+ eval_loss_renderer_strategy:
54
+ name: "single_renderer"
55
+ args:
56
+ shuffle_frames: false
57
+ all_frames: true
58
+
59
+ inv_z: true
60
+
61
+ learn_empty: false
62
+ code_mode: z
63
+
64
+ n_frames_render: 4 # number of frames to render among v==8
configs/model/dinov2_downsampler.yaml ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ arch: "BTSNet"
2
+ use_code: true
3
+ prediction_mode: default
4
+
5
+ predict_dino: true
6
+ dino_dims: 64 # == encoder.pca_dino_out
7
+
8
+ compensate_artifacts: true
9
+ flip_augmentation: true
10
+
11
+ encoder:
12
+ type: "dinov2"
13
+ mode: "downsample-prediction" # upsample-gt, downsample-pred
14
+ decoder_arch: "dpt"
15
+ # upsampler_arch: "multiscale-crop" # multiscale-crop, nearest
16
+ downsampler_arch: "featup" # featup, bilinear
17
+ encoder_arch: "vit-b" # vit-s, vit-b
18
+ version: "v2" # v1, v2, reg, fit3d
19
+ separate_gt_version: "v2" # v1, v2, reg, fit3d, None
20
+ encoder_freeze: false
21
+ flip_avg_gt: false
22
+ dim_reduction_arch: "mlp"
23
+ num_ch_enc: [64, 64, 128, 256]
24
+ intermediate_features: [3, 6, 9]
25
+ decoder_out_dim: 256
26
+ dino_pca_dim: 64 # == dino_dims
27
+ image_size: [192, 640]
28
+ key_features: false
29
+
30
+ code:
31
+ num_freqs: 6
32
+ freq_factor: 1.5
33
+ include_input: true
34
+
35
+ decoder_heads:
36
+ - type: "resnet"
37
+ name: "normal_head"
38
+ freeze: false
39
+ args:
40
+ n_blocks: 0
41
+ d_hidden: 128
42
+ final_prediction_head: "normal_head"
43
+
44
+ encoding_strategy:
45
+ name: "default"
46
+ args: {}
47
+ eval_encoding_strategy:
48
+ name: "default"
49
+ args: null
50
+ loss_renderer_strategy:
51
+ name: "kitti_360"
52
+ args: null
53
+ eval_loss_renderer_strategy:
54
+ name: "single_renderer"
55
+ args:
56
+ shuffle_frames: false
57
+ all_frames: true
58
+
59
+ inv_z: true
60
+
61
+ learn_empty: false
62
+ code_mode: z
63
+
64
+ n_frames_render: 4 # number of frames to render among v==8
configs/renderer/pixelnerf.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ n_coarse : 32
2
+ n_fine : 0
3
+ n_fine_depth : 0
4
+ depth_std : 1.0
5
+ sched : []
6
+ white_bkgd : false
7
+ lindisp: true
8
+ hard_alpha_cap: true
9
+ eval_batch_size: 65536
configs/train_scenedino_kitti_360.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - dataset: kitti_360_sscbench
3
+ - model: dino_downsampler
4
+ - renderer: pixelnerf
5
+ - training: scenedino
6
+ - validation: scenedino
7
+ - _self_
8
+
9
+ training_type: "full_training"
10
+ mode: "nvs"
11
+ seed: 0
12
+ backend: null
13
+ nproc_per_node: null
14
+ with_amp: true
15
+ name: "training"
16
+ batch_size: 4
17
+ num_workers: 4
18
+
19
+ output:
20
+ path: "out/features-paper"
21
+ unique_id: scenedino-kitti-360-sscbench
22
+
23
+ renderer:
24
+ n_coarse : 32
25
+ n_fine : 0
26
+ n_fine_depth : 0
27
+ depth_std : 1.0
28
+ sched : []
29
+ white_bkgd : false
30
+ lindisp: true
31
+ hard_alpha_cap: true
32
+ render_mode: volumetric
33
+ eval_batch_size: 65536
34
+ normalize_dino: true
35
+
36
+ training:
37
+ ray_sampler:
38
+ args:
39
+ patch_size: 8
40
+ # ray_batch_size: 512
41
+
42
+ scheduler:
43
+ step_size: 50000
configs/train_scenedino_re10k.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - dataset: realestate10k
3
+ - model: dino_downsampler
4
+ - renderer: pixelnerf
5
+ - training: scenedino
6
+ - validation: scenedino
7
+ - _self_
8
+
9
+ training_type: "full_training"
10
+ mode: "nvs"
11
+ seed: 0
12
+ backend: null
13
+ nproc_per_node: null
14
+ with_amp: true
15
+ name: "training"
16
+ batch_size: 4
17
+ num_workers: 4
18
+
19
+ output:
20
+ path: "out/features-paper"
21
+ unique_id: scenedino-re10k
22
+
23
+ renderer:
24
+ n_coarse : 32
25
+ n_fine : 0
26
+ n_fine_depth : 0
27
+ depth_std : 1.0
28
+ sched : []
29
+ white_bkgd : false
30
+ lindisp: true
31
+ hard_alpha_cap: true
32
+ render_mode: volumetric
33
+ eval_batch_size: 65536
34
+ normalize_dino: true
35
+
36
+ model:
37
+ encoder:
38
+ image_size: [288, 512]
39
+ loss_renderer_strategy:
40
+ name: "alternate"
41
+
42
+ training:
43
+ ray_sampler:
44
+ args:
45
+ patch_size: 8
46
+ # ray_batch_size: 512
47
+
48
+ scheduler:
49
+ step_size: 50000
configs/train_semantic_kitti_360.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - dataset: kitti_360_sscbench
3
+ - model: dino_downsampler
4
+ - renderer: pixelnerf
5
+ - training: semantic
6
+ - validation: semantic
7
+ - downstream: semantic
8
+ - _self_
9
+
10
+ training_type: "downstream_training"
11
+ mode: "nvs"
12
+ seed: 0
13
+ backend: null
14
+ nproc_per_node: null
15
+ with_amp: true
16
+ name: "training"
17
+ batch_size: 4
18
+ gradient_accum_factor: 1
19
+ num_workers: 6
20
+
21
+ renderer:
22
+ n_coarse : 32
23
+ n_fine : 0
24
+ n_fine_depth : 0
25
+ depth_std : 1.0
26
+ sched : []
27
+ white_bkgd : false
28
+ lindisp: true
29
+ hard_alpha_cap: true
30
+ render_mode: volumetric
31
+ eval_batch_size: 65536
32
+ normalize_dino: true
33
+
34
+ output:
35
+ path: "out/ssc-paper"
36
+ unique_id: ssc-kitti-360-sscbench
37
+
38
+ training:
39
+ epoch_length: 1000
40
+ resume_from: "<PATH-FEATURE-CHECKPOINT>.pt"
41
+
42
+ optimizer:
43
+ args:
44
+ lr: 5e-4
45
+
46
+ model:
47
+ sample_radius_3d: 0.5
48
+
49
+ downstream:
50
+ input_dim: 768
51
+ mode: "3d"
52
+ # mlp_head: true
configs/training/loss/scenedino.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ - type: reconstruction
2
+ coarse:
3
+ criterion: "l1+ssim"
4
+ dino_criterion: "cosine"
5
+ invalid_policy: weight_guided
6
+
7
+ reconstruct_dino: true
8
+ lambda_dino_coarse: 0.2
9
+ temperature_dino: 5
10
+
11
+ regularizations:
12
+ - type: edge_aware_smoothness
13
+ lambda: 0.001
14
+ - type: dino_edge_aware_smoothness
15
+ lambda: 0.25
configs/training/loss/semantic.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ - type: stego
2
+
3
+ random_weight: 0.6702352279261414
4
+ knn_weight: 0.4156436438453117
5
+ self_weight: 0.08146997886146659
6
+ random_shift: 0.8709334888837256
7
+ knn_shift: 0.18458300726748128
8
+ self_shift: 0.43610463774158115
9
+
10
+ pointwise: false
configs/training/optimizer/scenedino.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ type: "adam"
2
+ args:
3
+ lr: 1e-4
4
+ betas: [0.9, 0.999]
5
+ eps: 1e-08
6
+ weight_decay: 0.0
7
+ amsgrad: false
configs/training/optimizer/semantic.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ type: "adam"
2
+ args:
3
+ lr: 5e-4
4
+ betas: [0.9, 0.999]
5
+ eps: 1e-08
6
+ weight_decay: 0.0
7
+ amsgrad: false
configs/training/scenedino.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - optimizer: scenedino
3
+ - scheduler: scenedino
4
+ - loss: scenedino
5
+ - _self_
6
+
7
+ num_epochs: 50
8
+ continue: false
9
+
10
+ checkpoint_every: 10000
11
+ log_every_iters: 100
12
+
13
+ ray_sampler:
14
+ z_near: 3
15
+ z_far: 80
16
+ sample_mode: "patch"
17
+ args:
18
+ patch_size: 16
19
+ ray_batch_size: 2048
20
+ snap_to_grid: true
21
+ dino_upscaled: false
configs/training/scheduler/scenedino.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ type: step
2
+ step_size: 100000
3
+ gamma: 0.1
configs/training/semantic.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - optimizer: semantic
3
+ - loss: semantic
4
+ - _self_
5
+
6
+ num_epochs: 1
7
+ epoch_length: 2500
8
+ continue: false
9
+
10
+ checkpoint_every: 5000
11
+ log_every_iters: 250
12
+
13
+ ray_sampler:
14
+ z_near: 3
15
+ z_far: 80
16
+ sample_mode: "image"
configs/validation/scenedino.yaml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ validation:
2
+ metrics:
3
+ - type: depth
4
+ args: null
5
+ - type: dino
6
+ args: null
7
+ subset:
8
+ type: range
9
+ args:
10
+ start: 0
11
+ end: 128
12
+ save_best:
13
+ metric: dino_cos_sim
14
+ sign: 1
15
+ log_loss: false
16
+ global_step:
17
+ type: "trainer iteration"
18
+ events:
19
+ # - type: STARTED
20
+ # args: null
21
+ - type: ITERATION_COMPLETED
22
+ args:
23
+ every: 5000
24
+ # - type: EPOCH_COMPLETED
25
+ # args:
26
+ # every: 1
27
+ - type: COMPLETED
28
+ args: null
29
+
30
+ visualization:
31
+ metrics:
32
+ - type: depth
33
+ args: null
34
+ subset:
35
+ type: range
36
+ args:
37
+ start: 200
38
+ end: 201
39
+ visualize:
40
+ input_imgs: null
41
+ reconstructed_imgs: null
42
+ reconstruction_rmse: null
43
+ dino_gt: null
44
+ reconstructed_dino: null
45
+ reconstructed_dino_downsampled: null
46
+ batch_dino_gt: null
47
+ batch_dino_artifacts: null
48
+ batch_dino_features_kmeans: null
49
+ batch_dino_gt_kmeans: null
50
+ batch_reconstructed_dino: null
51
+ batch_reconstructed_dino_downsampled: null
52
+ dino_downsampling_salience: null
53
+ dino_downsampling_weight: null
54
+ dino_downsampling_per_patch_weight: null
55
+ dino_cos_sim_downsampled: null
56
+ depth: null
57
+ depth_profile: null
58
+ alpha_sum: null
59
+ ray_entropy: null
60
+ ray_entropy_weights: null
61
+ invalids: null
62
+ rendered_flow: null
63
+ predicted_occlusions: null
64
+ uncertainty: null
65
+ log_loss: false
66
+ global_step:
67
+ type: "trainer iteration"
68
+ events:
69
+ - type: STARTED
70
+ args: null
71
+ - type: ITERATION_COMPLETED
72
+ args:
73
+ every: 5000
74
+ # - type: EPOCH_COMPLETED
75
+ # args:
76
+ # every: 1
77
+ - type: COMPLETED
78
+ args: null
configs/validation/semantic.yaml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ validation:
2
+ metrics:
3
+ - type: seg
4
+ agg_type: unsup_seg
5
+ args:
6
+ n_classes: 19
7
+ gt_classes: 19
8
+ - type: stego
9
+ agg_type: concat
10
+ subset:
11
+ type: random
12
+ args:
13
+ size: 32
14
+ save_best:
15
+ metric: "stego_cluster_weighted_miou"
16
+ update_model: true
17
+ dry_run: false
18
+ log_loss: false
19
+ global_step:
20
+ type: "trainer iteration"
21
+ events:
22
+ # - type: STARTED
23
+ # args: null
24
+ - type: ITERATION_COMPLETED
25
+ args:
26
+ every: 100
27
+ # - type: EPOCH_COMPLETED
28
+ # args:
29
+ # every: 1
30
+ # - type: COMPLETED
31
+ # args: null
32
+
33
+ visualization_seg:
34
+ metrics: {}
35
+ subset:
36
+ type: range
37
+ args:
38
+ start: 300
39
+ end: 301
40
+ visualize:
41
+ input_imgs: null
42
+ # reconstructed_imgs: null
43
+ # reconstruction_rmse: null
44
+ dino_gt: null
45
+ # reconstructed_dino: null
46
+ # reconstructed_dino_downsampled: null
47
+ batch_dino_gt: null
48
+ batch_dino_artifacts: null
49
+ segs_gt: null
50
+ segs_pred: null
51
+ batch_reconstructed_dino: null
52
+ batch_dino_features_kmeans: null
53
+ # batch_reconstructed_dino_downsampled: null
54
+ # dino_downsampling_salience: null
55
+ # dino_downsampling_weight: null
56
+ # dino_downsampling_per_patch_weight: null
57
+ # dino_cos_sim_downsampled: null,
58
+ depth: null
59
+ # depth_profile: null
60
+ # alpha_sum: null
61
+ # ray_entropy: null
62
+ # ray_entropy_weights: null
63
+ # invalids: null
64
+ # rendered_flow: null
65
+ # predicted_occlusions: null
66
+ # uncertainty: null
67
+ log_loss: false
68
+ global_step:
69
+ type: "trainer iteration"
70
+ events:
71
+ - type: STARTED
72
+ args: null
73
+ - type: ITERATION_COMPLETED
74
+ args:
75
+ every: 100
76
+ # - type: EPOCH_COMPLETED
77
+ # args:
78
+ # every: 1
79
+ # - type: COMPLETED
80
+ # args: null
datasets/__init__.py ADDED
File without changes
datasets/bdd/bdd_dataset.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ import time
4
+ import torch
5
+ import os
6
+
7
+ from PIL import Image
8
+
9
+ from torchvision import transforms
10
+ from torch.utils.data import Dataset
11
+
12
+ from collections import namedtuple
13
+ from datasets.kitti_360.labels import trainId2label
14
+
15
+
16
+ Label = namedtuple(
17
+ "Label",
18
+ [
19
+ "name",
20
+ "id",
21
+ "trainId",
22
+ "category",
23
+ "categoryId",
24
+ "hasInstances",
25
+ "ignoreInEval",
26
+ "color",
27
+ "to_cs27",
28
+ ],
29
+ )
30
+
31
+ BDD_LABEL = [
32
+ Label("unlabeled", 0, 255, "void", 0, False, True, (0, 0, 0), 255),
33
+ Label("dynamic", 1, 255, "void", 0, False, True, (111, 74, 0), 255),
34
+ Label("ego vehicle", 2, 255, "void", 0, False, True, (0, 0, 0), 255),
35
+ Label("ground", 3, 255, "void", 0, False, True, (81, 0, 81), 255),
36
+ Label("static", 4, 255, "void", 0, False, True, (0, 0, 0), 255),
37
+ Label("parking", 5, 255, "flat", 1, False, True, (250, 170, 160), 2),
38
+ Label("rail track", 6, 255, "flat", 1, False, True, (230, 150, 140), 3),
39
+ Label("road", 7, 0, "flat", 1, False, False, (128, 64, 128), 0),
40
+ Label("sidewalk", 8, 1, "flat", 1, False, False, (244, 35, 232), 1),
41
+ Label("bridge", 9, 255, "construction", 2, False, True, (150, 100, 100), 8),
42
+ Label("building", 10, 2, "construction", 2, False, False, (70, 70, 70), 4),
43
+ Label("fence", 11, 4, "construction", 2, False, False, (190, 153, 153), 6),
44
+ Label("garage", 12, 255, "construction", 2, False, True, (180, 100, 180), 255),
45
+ Label("guard rail", 13, 255, "construction", 2, False, True, (180, 165, 180), 7),
46
+ Label("tunnel", 14, 255, "construction", 2, False, True, (150, 120, 90), 9),
47
+ Label("wall", 15, 3, "construction", 2, False, False, (102, 102, 156), 5),
48
+ Label("banner", 16, 255, "object", 3, False, True, (250, 170, 100), 255),
49
+ Label("billboard", 17, 255, "object", 3, False, True, (220, 220, 250), 255),
50
+ Label("lane divider", 18, 255, "object", 3, False, True, (255, 165, 0), 255),
51
+ Label("parking sign", 19, 255, "object", 3, False, False, (220, 20, 60), 255),
52
+ Label("pole", 20, 5, "object", 3, False, False, (153, 153, 153), 10),
53
+ Label("polegroup", 21, 255, "object", 3, False, True, (153, 153, 153), 11),
54
+ Label("street light", 22, 255, "object", 3, False, True, (220, 220, 100), 255),
55
+ Label("traffic cone", 23, 255, "object", 3, False, True, (255, 70, 0), 255),
56
+ Label("traffic device", 24, 255, "object", 3, False, True, (220, 220, 220), 255),
57
+ Label("traffic light", 25, 6, "object", 3, False, False, (250, 170, 30), 12),
58
+ Label("traffic sign", 26, 7, "object", 3, False, False, (220, 220, 0), 13),
59
+ Label("traffic sign frame", 27, 255, "object", 3, False, True, (250, 170, 250), 255),
60
+ Label("terrain", 28, 9, "nature", 4, False, False, (152, 251, 152), 15),
61
+ Label("vegetation", 29, 8, "nature", 4, False, False, (107, 142, 35), 14),
62
+ Label("sky", 30, 10, "sky", 5, False, False, (70, 130, 180), 16),
63
+ Label("person", 31, 11, "human", 6, True, False, (220, 20, 60), 17),
64
+ Label("rider", 32, 12, "human", 6, True, False, (255, 0, 0), 18),
65
+ Label("bicycle", 33, 18, "vehicle", 7, True, False, (119, 11, 32), 26),
66
+ Label("bus", 34, 15, "vehicle", 7, True, False, (0, 60, 100), 21),
67
+ Label("car", 35, 13, "vehicle", 7, True, False, (0, 0, 142), 19),
68
+ Label("caravan", 36, 255, "vehicle", 7, True, True, (0, 0, 90), 22),
69
+ Label("motorcycle", 37, 17, "vehicle", 7, True, False, (0, 0, 230), 25),
70
+ Label("trailer", 38, 255, "vehicle", 7, True, True, (0, 0, 110), 23),
71
+ Label("train", 39, 16, "vehicle", 7, True, False, (0, 80, 100), 24),
72
+ Label("truck", 40, 14, "vehicle", 7, True, False, (0, 0, 70), 20),
73
+ ]
74
+
75
+
76
+ def resize_with_padding(img, target_size, padding_value, interpolation):
77
+ target_h, target_w = target_size
78
+ width, height = img.size
79
+ aspect = width / height
80
+
81
+ if aspect > (target_w / target_h):
82
+ new_w = target_w
83
+ new_h = int(target_w / aspect)
84
+ else:
85
+ new_h = target_h
86
+ new_w = int(target_h * aspect)
87
+
88
+ img = transforms.functional.resize(img, (new_h, new_w), interpolation)
89
+
90
+ pad_h = target_h - new_h
91
+ pad_w = target_w - new_w
92
+ padding = (pad_w // 2, pad_h // 2, pad_w - pad_w // 2, pad_h - pad_h // 2)
93
+
94
+ return transforms.functional.pad(img, padding, fill=padding_value)
95
+
96
+
97
+ class BDDSeg(Dataset):
98
+ def __init__(self, root, image_set, image_size=(192, 640)):
99
+ super(BDDSeg, self).__init__()
100
+ self.split = image_set
101
+ self.root = root
102
+
103
+ self.image_transform = transforms.Compose([
104
+ #transforms.Lambda(lambda img: resize_with_padding(img, image_size, padding_value=0, interpolation=transforms.InterpolationMode.BILINEAR)),
105
+
106
+ transforms.Resize((320, 640), interpolation=transforms.InterpolationMode.BILINEAR),
107
+ transforms.CenterCrop(image_size),
108
+ transforms.ToTensor(),
109
+ ])
110
+
111
+ self.target_transform = transforms.Compose([
112
+ #transforms.Lambda(lambda img: resize_with_padding(img, image_size, padding_value=-1, interpolation=transforms.InterpolationMode.NEAREST)),
113
+
114
+ transforms.Resize((320, 640), interpolation=transforms.InterpolationMode.NEAREST),
115
+ transforms.CenterCrop(image_size),
116
+ transforms.PILToTensor(),
117
+ transforms.Lambda(lambda x: x.long()),
118
+ ])
119
+
120
+ self.images, self.targets = [], []
121
+
122
+ image_dir = os.path.join(self.root, "images/10k", self.split)
123
+ target_dir = os.path.join(self.root, "labels/pan_seg/bitmasks", self.split)
124
+ for file_name in os.listdir(image_dir):
125
+ image_path = os.path.join(image_dir, file_name)
126
+
127
+ target_filename = os.path.splitext(file_name)[0] + ".png"
128
+ target_path = os.path.join(target_dir, target_filename)
129
+ assert os.path.isfile(target_path)
130
+
131
+ self.images.append(image_path)
132
+ self.targets.append(target_path)
133
+
134
+ self.class_mapping = torch.Tensor([trainId2label[c.trainId].id for c in BDD_LABEL]).int()
135
+
136
+ def __getitem__(self, index):
137
+ _start_time = time.time()
138
+
139
+ image = Image.open(self.images[index]).convert("RGB")
140
+ target = Image.open(self.targets[index])
141
+
142
+ image = self.image_transform(image)
143
+ target = self.target_transform(target)
144
+
145
+ image = 2.0 * image - 1.0
146
+ poses = torch.eye(4) # (4, 4)
147
+ projs = torch.eye(3) # (3, 3)
148
+ target = target[0] # ("instance", "semantic", "polygon", "color")
149
+ target = self.class_mapping[target]
150
+
151
+ _proc_time = time.time() - _start_time
152
+
153
+ data = {
154
+ "imgs": [image.numpy()],
155
+ "poses": [poses.numpy()],
156
+ "projs": [projs.numpy()],
157
+ "segs": [target.numpy()],
158
+ "t__get_item__": np.array([_proc_time]),
159
+ "index": [np.array([index])],
160
+ }
161
+ return data
162
+
163
+ def __len__(self):
164
+ return len(self.images)
datasets/cityscapes/cityscapes_dataset.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ import time
4
+ import torch
5
+
6
+ from torchvision import transforms
7
+ from torchvision.datasets.cityscapes import Cityscapes
8
+ from torch.utils.data import Dataset
9
+
10
+
11
+ def resize_with_padding(img, target_size, padding_value, interpolation):
12
+ target_h, target_w = target_size
13
+ width, height = img.size
14
+ aspect = width / height
15
+
16
+ if aspect > (target_w / target_h):
17
+ new_w = target_w
18
+ new_h = int(target_w / aspect)
19
+ else:
20
+ new_h = target_h
21
+ new_w = int(target_h * aspect)
22
+
23
+ img = transforms.functional.resize(img, (new_h, new_w), interpolation)
24
+
25
+ pad_h = target_h - new_h
26
+ pad_w = target_w - new_w
27
+ padding = (pad_w // 2, pad_h // 2, pad_w - pad_w // 2, pad_h - pad_h // 2)
28
+
29
+ return transforms.functional.pad(img, padding, fill=padding_value)
30
+
31
+ class CityscapesSeg(Dataset):
32
+ def __init__(self, root, image_set, image_size=(192, 640)):
33
+ super(CityscapesSeg, self).__init__()
34
+ self.split = image_set
35
+ self.root = root
36
+
37
+ transform = transforms.Compose([
38
+ #transforms.Lambda(lambda img: resize_with_padding(img, image_size, padding_value=0, interpolation=transforms.InterpolationMode.BILINEAR)),
39
+
40
+ transforms.Resize((320, 640), interpolation=transforms.InterpolationMode.BILINEAR),
41
+ transforms.CenterCrop(image_size),
42
+ transforms.ToTensor(),
43
+ ])
44
+
45
+ target_transform = transforms.Compose([
46
+ #transforms.Lambda(lambda img: resize_with_padding(img, image_size, padding_value=-1, interpolation=transforms.InterpolationMode.NEAREST)),
47
+
48
+ transforms.Resize((320, 640), interpolation=transforms.InterpolationMode.NEAREST),
49
+ transforms.CenterCrop(image_size),
50
+ transforms.PILToTensor(),
51
+ transforms.Lambda(lambda x: x.long()),
52
+ ])
53
+
54
+ self.inner_loader = Cityscapes(self.root, image_set,
55
+ mode="fine",
56
+ target_type="semantic",
57
+ transform=transform,
58
+ target_transform=target_transform)
59
+
60
+ def __getitem__(self, index):
61
+ _start_time = time.time()
62
+ image, target = self.inner_loader[index] # (3, h, w) / (1, h, w)
63
+
64
+ image = 2.0 * image - 1.0
65
+ poses = torch.eye(4) # (4, 4)
66
+ projs = torch.eye(3) # (3, 3)
67
+ target = target.squeeze(0) # (h, w)
68
+
69
+ _proc_time = time.time() - _start_time
70
+
71
+ data = {
72
+ "imgs": [image.numpy()],
73
+ "poses": [poses.numpy()],
74
+ "projs": [projs.numpy()],
75
+ "segs": [target.numpy()],
76
+ "t__get_item__": np.array([_proc_time]),
77
+ "index": [np.array([index])],
78
+ }
79
+ return data
80
+
81
+ def __len__(self):
82
+ return len(self.inner_loader)
datasets/data_util.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from datasets.kitti_360.kitti_360_dataset import Kitti360Dataset
4
+ from datasets.kitti_odom.kitti_odometry_dataset import KittiOdometryDataset
5
+ from datasets.kitti_raw.kitti_raw_dataset import KittiRawDataset
6
+ from datasets.nyu_depth_v2.nyu_depth_v2_dataset import NYUDepthV2Dataset
7
+ from datasets.realestate10k.realestate10k_dataset import RealEstate10kDataset
8
+ from datasets.waymo.waymo_dataset import WaymoDataset
9
+
10
+
11
+ def make_datasets(config):
12
+ type = config.get("type", "KITTI_Raw")
13
+ if type == "KITTI_Odometry":
14
+ train_dataset = KittiOdometryDataset(
15
+ base_path=config["data_path"],
16
+ frame_count=config.get("data_fc", 1),
17
+ target_image_size=config.get("image_size", (128, 256)),
18
+ return_stereo=config.get("data_stereo", False),
19
+ sequences=config.get("train_sequences", ("00",)),
20
+ custom_pose_path=config.get("custom_pose_path", None),
21
+ keyframe_offset=0 #-(config.get("data_fc", 1) // 2)
22
+ )
23
+ test_dataset = KittiOdometryDataset(
24
+ base_path=config["data_path"],
25
+ frame_count=config.get("data_fc", 1),
26
+ target_image_size=config.get("image_size", (128, 256)),
27
+ return_stereo=config.get("data_stereo", False),
28
+ sequences=config.get("val_sequences", ("00",)),
29
+ custom_pose_path=config.get("custom_pose_path", None),
30
+ keyframe_offset=0 #-(config.get("data_fc", 1) // 2)
31
+ )
32
+ return train_dataset, test_dataset
33
+
34
+ elif type == "KITTI_Raw":
35
+ train_dataset = KittiRawDataset(
36
+ data_path=config["data_path"],
37
+ pose_path=config["pose_path"],
38
+ split_path=os.path.join(config["split_path"], "train_files.txt"),
39
+ target_image_size=config.get("image_size", (192, 640)),
40
+ frame_count=config.get("data_fc", 1),
41
+ return_stereo=config.get("data_stereo", False),
42
+ keyframe_offset=config.get("keyframe_offset", 0),
43
+ dilation=config.get("dilation", 1),
44
+ color_aug=config.get("color_aug", False)
45
+ )
46
+ test_dataset = KittiRawDataset(
47
+ data_path=config["data_path"],
48
+ pose_path=config["pose_path"],
49
+ split_path=os.path.join(config["split_path"], "val_files.txt"),
50
+ target_image_size=config.get("image_size", (192, 640)),
51
+ frame_count=config.get("data_fc", 1),
52
+ return_stereo=config.get("data_stereo", False),
53
+ keyframe_offset=config.get("keyframe_offset", 0),
54
+ dilation=config.get("dilation", 1),
55
+ )
56
+ return train_dataset, test_dataset
57
+
58
+ elif type == "KITTI_360":
59
+ if config.get("split_path", None) is None:
60
+ train_split_path = None
61
+ test_split_path = None
62
+ else:
63
+ train_split_path = os.path.join(config["split_path"], "train_files.txt")
64
+ test_split_path = os.path.join(config["split_path"], "val_files.txt")
65
+
66
+ train_dataset = Kitti360Dataset(
67
+ data_path=config["data_path"],
68
+ pose_path=config["pose_path"],
69
+ split_path=train_split_path,
70
+ target_image_size=tuple(config.get("image_size", (192, 640))),
71
+ frame_count=config.get("data_fc", 3),
72
+ return_stereo=config.get("data_stereo", True),
73
+ return_fisheye=config.get("data_fisheye", True),
74
+ return_3d_bboxes=config.get("data_3d_bboxes", False),
75
+ return_segmentation=config.get("data_segmentation", False),
76
+ keyframe_offset=config.get("keyframe_offset", 0),
77
+ dilation=config.get("dilation", 1),
78
+ fisheye_rotation=config.get("fisheye_rotation", 0),
79
+ fisheye_offset=config.get("fisheye_offset", 1),
80
+ color_aug=config.get("color_aug", False),
81
+ is_preprocessed=config.get("is_preprocessed", False)
82
+ )
83
+ test_dataset = Kitti360Dataset(
84
+ data_path=config["data_path"],
85
+ pose_path=config["pose_path"],
86
+ split_path=test_split_path,
87
+ target_image_size=tuple(config.get("image_size", (192, 640))),
88
+ frame_count=config.get("data_fc", 3),
89
+ return_stereo=config.get("data_stereo", True),
90
+ return_fisheye=config.get("data_fisheye", True),
91
+ return_3d_bboxes=config.get("data_3d_bboxes", False),
92
+ return_segmentation=config.get("data_segmentation", False),
93
+ keyframe_offset=config.get("keyframe_offset", 0),
94
+ fisheye_rotation=config.get("fisheye_rotation", 0),
95
+ fisheye_offset=config.get("fisheye_offset", 1),
96
+ dilation=config.get("dilation", 1),
97
+ is_preprocessed=config.get("is_preprocessed", False)
98
+ )
99
+ return train_dataset, test_dataset
100
+
101
+ elif type == "RealEstate10k":
102
+ train_dataset = RealEstate10kDataset(
103
+ data_path=config["data_path"],
104
+ split_path=None,
105
+ target_image_size=config.get("image_size", (256, 384)),
106
+ frame_count=config.get("data_fc", 2),
107
+ keyframe_offset=0, #-(config.get("data_fc", 1) // 2),
108
+ dilation=config.get("dilation", 10),
109
+ color_aug=config.get("color_aug", False)
110
+ )
111
+ test_dataset = RealEstate10kDataset(
112
+ data_path=config["data_path"],
113
+ split_path=os.path.join(config["split_path"], "val_files.txt"),
114
+ target_image_size=config.get("image_size", (256, 384)),
115
+ frame_count=config.get("data_fc", 2),
116
+ keyframe_offset=0, #-(config.get("data_fc", 1) // 2),
117
+ dilation=config.get("dilation", 10),
118
+ color_aug=False
119
+ )
120
+ return train_dataset, test_dataset
121
+
122
+ elif type == "Waymo":
123
+ if config.get("split_path", None) is None:
124
+ train_split_path = None
125
+ test_split_path = None
126
+ else:
127
+ train_split_path = os.path.join(config["split_path"], "train_files.txt")
128
+ test_split_path = os.path.join(config["split_path"], "val_files.txt")
129
+
130
+ train_dataset = WaymoDataset(
131
+ data_path=config["data_path"],
132
+ mode="training",
133
+ split_path=train_split_path,
134
+ target_image_size=tuple(config.get("image_size", (320, 480))),
135
+ frame_count=config.get("data_fc", 2),
136
+ keyframe_offset=config.get("keyframe_offset", 0),
137
+ return_45=config.get("return_45", True),
138
+ return_90=config.get("return_90", True),
139
+ offset_45=config.get("offset_45", 5),
140
+ offset_90=config.get("offset_90", 10),
141
+ dilation=config.get("dilation", 1),
142
+ color_aug=config.get("color_aug", True),
143
+ correct_exposure=config.get("correct_exposure", True),
144
+ )
145
+ test_dataset = WaymoDataset(
146
+ data_path=config["data_path"],
147
+ mode="validation",
148
+ split_path=test_split_path,
149
+ target_image_size=tuple(config.get("image_size", (320, 480))),
150
+ frame_count=config.get("data_fc", 2),
151
+ keyframe_offset=config.get("keyframe_offset", 0),
152
+ return_45=config.get("return_45", True),
153
+ return_90=config.get("return_90", True),
154
+ offset_45=config.get("offset_45", 5),
155
+ offset_90=config.get("offset_90", 10),
156
+ dilation=config.get("dilation", 1),
157
+ color_aug=False,
158
+ return_depth=True,
159
+ correct_exposure=config.get("correct_exposure", True),
160
+ )
161
+ return train_dataset, test_dataset
162
+
163
+ elif type == "KITTI_Raw_DFT":
164
+ train_dataset = KittiRawDataset(
165
+ data_path=config["data_path"],
166
+ pose_path=config["pose_path"],
167
+ split_path=os.path.join(config["split_path"], "train_files.txt"),
168
+ target_image_size=config.get("image_size", (192, 640)),
169
+ frame_count=config.get("data_fc", 1),
170
+ return_stereo=config.get("data_stereo", False),
171
+ keyframe_offset=config.get("keyframe_offset", 0),
172
+ dilation=config.get("dilation", 1),
173
+ color_aug=config.get("color_aug", False)
174
+ )
175
+ test_dataset = KittiRawDataset(
176
+ data_path=config["data_path"],
177
+ pose_path=config["pose_path"],
178
+ split_path=os.path.join(config["split_path"], "val_files.txt"),
179
+ target_image_size=config.get("image_size", (192, 640)),
180
+ frame_count=config.get("data_fc", 1),
181
+ return_stereo=config.get("data_stereo", False),
182
+ keyframe_offset=config.get("keyframe_offset", 0),
183
+ dilation=config.get("dilation", 1),
184
+ )
185
+ return train_dataset, test_dataset
186
+
187
+ elif type == "KITTI_360_DFT":
188
+ if config.get("split_path", None) is None:
189
+ train_split_path = None
190
+ test_split_path = None
191
+ else:
192
+ train_split_path = os.path.join(config["split_path"], "train_files.txt")
193
+ test_split_path = os.path.join(config["split_path"], "val_files.txt")
194
+
195
+ train_dataset = Kitti360Dataset(
196
+ data_path=config["data_path"],
197
+ pose_path=config["pose_path"],
198
+ split_path=train_split_path,
199
+ target_image_size=tuple(config.get("image_size", (192, 640))),
200
+ frame_count=config.get("data_fc", 3),
201
+ return_stereo=config.get("data_stereo", True),
202
+ return_fisheye=config.get("data_fisheye", True),
203
+ return_3d_bboxes=config.get("data_3d_bboxes", False),
204
+ return_segmentation=config.get("data_segmentation", False),
205
+ keyframe_offset=config.get("keyframe_offset", 0),
206
+ dilation=config.get("dilation", 1),
207
+ fisheye_rotation=config.get("fisheye_rotation", 0),
208
+ fisheye_offset=config.get("fisheye_offset", 1),
209
+ stereo_offset=config.get("stereo_offset", 1),
210
+ color_aug=config.get("color_aug", False),
211
+ is_preprocessed=config.get("is_preprocessed", False)
212
+ )
213
+ test_dataset = Kitti360Dataset(
214
+ data_path=config["data_path"],
215
+ pose_path=config["pose_path"],
216
+ split_path=test_split_path,
217
+ target_image_size=tuple(config.get("image_size", (192, 640))),
218
+ frame_count=config.get("data_fc", 3),
219
+ return_stereo=config.get("data_stereo", True),
220
+ return_fisheye=config.get("data_fisheye", True),
221
+ return_3d_bboxes=config.get("data_3d_bboxes", False),
222
+ return_segmentation=config.get("data_segmentation", False),
223
+ keyframe_offset=config.get("keyframe_offset", 0),
224
+ fisheye_rotation=config.get("fisheye_rotation", 0),
225
+ fisheye_offset=config.get("fisheye_offset", [10])[0], ## this modifies the offsets for all datasets including the training dataset
226
+ stereo_offset= config.get("stereo_offset", [1])[0], ## This is to set consistent evaluation with test and viz
227
+ dilation=config.get("dilation", 1),
228
+ is_preprocessed=config.get("is_preprocessed", False)
229
+ )
230
+ return train_dataset, test_dataset
231
+
232
+ else:
233
+ raise NotImplementedError(f"Unsupported dataset type: {type}")
234
+
235
+
236
+ def make_test_dataset(config):
237
+ type = config.get("type", "KITTI_Raw")
238
+ if type == "KITTI_Raw":
239
+ test_dataset = KittiRawDataset(
240
+ data_path=config["data_path"],
241
+ pose_path=config["pose_path"],
242
+ split_path=os.path.join(config["split_path"], "test_files.txt"),
243
+ target_image_size=config.get("image_size", (192, 640)),
244
+ return_depth=True,
245
+ frame_count=config.get("data_fc", 1),
246
+ return_stereo=config.get("data_stereo", False),
247
+ keyframe_offset=0
248
+ )
249
+ return test_dataset
250
+ elif type == "KITTI_360":
251
+ test_dataset = Kitti360Dataset(
252
+ data_path=config["data_path"],
253
+ pose_path=config["pose_path"],
254
+ split_path=os.path.join(config.get("split_path", None), "test_files.txt"),
255
+ target_image_size=tuple(config.get("image_size", (192, 640))),
256
+ frame_count=config.get("data_fc", 1),
257
+ return_stereo=config.get("data_stereo", False),
258
+ return_fisheye=config.get("data_fisheye", False),
259
+ return_3d_bboxes=config.get("data_3d_bboxes", False),
260
+ return_segmentation=config.get("data_segmentation", False),
261
+ keyframe_offset=0,
262
+ fisheye_rotation=config.get("fisheye_rotation", 0),
263
+ fisheye_offset=config.get("fisheye_offset", 1),
264
+ dilation=config.get("dilation", 1),
265
+ is_preprocessed=config.get("is_preprocessed", False)
266
+ )
267
+ return test_dataset
268
+ elif type == "KITTI_360_DFT":
269
+ test_dataset = Kitti360Dataset(
270
+ data_path=config["data_path"],
271
+ pose_path=config["pose_path"],
272
+ split_path=os.path.join(config.get("split_path", None), "test_files.txt"),
273
+ target_image_size=tuple(config.get("image_size", (192, 640))),
274
+ frame_count=config.get("data_fc", 1),
275
+ return_stereo=config.get("data_stereo", False),
276
+ return_fisheye=config.get("data_fisheye", False),
277
+ return_3d_bboxes=config.get("data_3d_bboxes", False),
278
+ return_segmentation=config.get("data_segmentation", False),
279
+ keyframe_offset=0,
280
+ fisheye_rotation=config.get("fisheye_rotation", 0),
281
+ fisheye_offset=config.get("fisheye_offset", [10])[0], ## this modifies the offsets for all datasets including the training dataset
282
+ stereo_offset= config.get("stereo_offset", [1])[0], ## This is to set consistent evaluation with test and viz
283
+ dilation=config.get("dilation", 1),
284
+ is_preprocessed=config.get("is_preprocessed", False),
285
+
286
+ return_depth=True
287
+ )
288
+ return test_dataset
289
+ elif type == "RealEstate10k":
290
+ test_dataset = RealEstate10kDataset(
291
+ data_path=config["data_path"],
292
+ split_path=os.path.join(config["split_path"], "test_files.txt"),
293
+ target_image_size=config.get("image_size", (256, 384)),
294
+ frame_count=config.get("data_fc", 2),
295
+ keyframe_offset=0,
296
+ dilation=config.get("dilation", 10),
297
+ color_aug=False
298
+ )
299
+ return test_dataset
300
+ elif type == "NYU_Depth_V2":
301
+ test_dataset = NYUDepthV2Dataset(
302
+ data_path=config["data_path"],
303
+ target_image_size=config.get("image_size", (256, 384)),
304
+ )
305
+ return test_dataset
306
+ else:
307
+ raise NotImplementedError(f"Unsupported dataset type: {type}")
datasets/kitti_360/__init__.py ADDED
File without changes
datasets/kitti_360/annotation.py ADDED
@@ -0,0 +1,538 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ #
3
+
4
+ from __future__ import print_function, absolute_import, division
5
+
6
+ import glob
7
+ import json
8
+ import os
9
+ import struct
10
+ import xml.etree.ElementTree as ET
11
+ from collections import defaultdict
12
+ from collections import namedtuple
13
+
14
+ import numpy as np
15
+ from matplotlib import cm
16
+ from skimage import io, filters
17
+
18
+ # get current date and time
19
+
20
+ # A point in a polygon
21
+ Point = namedtuple('Point', ['x', 'y'])
22
+
23
+
24
+ from abc import ABCMeta
25
+ from datasets.kitti_360.labels import labels, id2label, kittiId2label, name2label
26
+
27
+ MAX_N = 1000
28
+ def local2global(semanticId, instanceId):
29
+ globalId = semanticId*MAX_N + instanceId
30
+ if isinstance(globalId, np.ndarray):
31
+ return globalId.astype(np.int)
32
+ else:
33
+ return int(globalId)
34
+
35
+ def global2local(globalId):
36
+ semanticId = globalId // MAX_N
37
+ instanceId = globalId % MAX_N
38
+ if isinstance(globalId, np.ndarray):
39
+ return semanticId.astype(np.int), instanceId.astype(np.int)
40
+ else:
41
+ return int(semanticId), int(instanceId)
42
+
43
+ annotation2global = defaultdict()
44
+
45
+ # Abstract base class for annotation objects
46
+ class KITTI360Object:
47
+ __metaclass__ = ABCMeta
48
+
49
+ def __init__(self):
50
+ # the label
51
+ self.label = ""
52
+
53
+ # colormap
54
+ self.cmap = cm.get_cmap('Set1')
55
+ self.cmap_length = 9
56
+
57
+ def getColor(self, idx):
58
+ if idx==0:
59
+ return np.array([0,0,0])
60
+ return np.asarray(self.cmap(idx % self.cmap_length)[:3])*255.
61
+
62
+ def assignColor(self):
63
+ if self.semanticId>=0:
64
+ self.semanticColor = id2label[self.semanticId].color
65
+ if self.instanceId>0:
66
+ self.instanceColor = self.getColor(self.instanceId)
67
+ else:
68
+ self.instanceColor = self.semanticColor
69
+
70
+
71
+ # Class that contains the information of a single annotated object as 3D bounding box
72
+ class KITTI360Bbox3D(KITTI360Object):
73
+ # Constructor
74
+ def __init__(self):
75
+ KITTI360Object.__init__(self)
76
+ # the polygon as list of points
77
+ self.vertices = []
78
+ self.faces = []
79
+ self.lines = [[0,5],[1,4],[2,7],[3,6],
80
+ [0,1],[1,3],[3,2],[2,0],
81
+ [4,5],[5,7],[7,6],[6,4]]
82
+
83
+ # the ID of the corresponding object
84
+ self.semanticId = -1
85
+ self.instanceId = -1
86
+ self.annotationId = -1
87
+
88
+ # the window that contains the bbox
89
+ self.start_frame = -1
90
+ self.end_frame = -1
91
+
92
+ # timestamp of the bbox (-1 if statis)
93
+ self.timestamp = -1
94
+
95
+ # projected vertices
96
+ self.vertices_proj = None
97
+ self.meshes = []
98
+
99
+ # name
100
+ self.name = ''
101
+
102
+ def __str__(self):
103
+ return self.name
104
+
105
+ def generateMeshes(self):
106
+ self.meshes = []
107
+ if self.vertices_proj:
108
+ for fidx in range(self.faces.shape[0]):
109
+ self.meshes.append( [ Point(self.vertices_proj[0][int(x)], self.vertices_proj[1][int(x)]) for x in self.faces[fidx]] )
110
+
111
+ def parseOpencvMatrix(self, node):
112
+ rows = int(node.find('rows').text)
113
+ cols = int(node.find('cols').text)
114
+ data = node.find('data').text.split(' ')
115
+
116
+ mat = []
117
+ for d in data:
118
+ d = d.replace('\n', '')
119
+ if len(d)<1:
120
+ continue
121
+ mat.append(float(d))
122
+ mat = np.reshape(mat, [rows, cols])
123
+ return mat
124
+
125
+ def parseVertices(self, child):
126
+ transform = self.parseOpencvMatrix(child.find('transform'))
127
+ R = transform[:3,:3]
128
+ T = transform[:3,3]
129
+ vertices = self.parseOpencvMatrix(child.find('vertices'))
130
+ faces = self.parseOpencvMatrix(child.find('faces'))
131
+
132
+ vertices = np.matmul(R, vertices.transpose()).transpose() + T
133
+ self.vertices = vertices
134
+ self.faces = faces
135
+ self.R = R
136
+ self.T = T
137
+
138
+ def parseBbox(self, child):
139
+ semanticIdKITTI = int(child.find('semanticId').text)
140
+ self.semanticId = kittiId2label[semanticIdKITTI].id
141
+ self.instanceId = int(child.find('instanceId').text)
142
+ self.name = kittiId2label[semanticIdKITTI].name
143
+
144
+ self.start_frame = int(child.find('start_frame').text)
145
+ self.end_frame = int(child.find('end_frame').text)
146
+
147
+ self.timestamp = int(child.find('timestamp').text)
148
+
149
+ self.annotationId = int(child.find('index').text) + 1
150
+
151
+ global annotation2global
152
+ annotation2global[self.annotationId] = local2global(self.semanticId, self.instanceId)
153
+ self.parseVertices(child)
154
+
155
+ def parseStuff(self, child):
156
+ classmap = {'driveway': 'parking', 'ground': 'terrain', 'unknownGround': 'ground',
157
+ 'railtrack': 'rail track', 'bigPole': 'pole', 'unknownObject': 'unknown object',
158
+ 'smallPole': 'smallpole', 'trafficSign': 'traffic sign', 'trashbin': 'trash bin',
159
+ 'guardrail': 'guard rail', 'trafficLight': 'traffic light', 'pedestrian': 'person',
160
+ 'vendingmachine': 'vending machine', 'unknownConstruction': 'unknown construction',
161
+ 'unknownVehicle': 'unknown vehicle'}
162
+ label = child.find('label').text
163
+ if label in classmap.keys():
164
+ label = classmap[label]
165
+
166
+ self.start_frame = int(child.find('start_frame').text)
167
+ self.end_frame = int(child.find('end_frame').text)
168
+
169
+ self.timestamp = int(child.find('timestamp').text)
170
+
171
+ self.semanticId = name2label[label].id
172
+ self.name = label
173
+ self.parseVertices(child)
174
+
175
+ # Class that contains the information of the point cloud a single frame
176
+ class KITTI360Point3D(KITTI360Object):
177
+ # Constructor
178
+ def __init__(self):
179
+ KITTI360Object.__init__(self)
180
+
181
+ self.vertices = []
182
+
183
+ self.vertices_proj = None
184
+
185
+ # the ID of the corresponding object
186
+ self.semanticId = -1
187
+ self.instanceId = -1
188
+ self.annotationId = -1
189
+
190
+ # name
191
+ self.name = ''
192
+
193
+ # color
194
+ self.semanticColor = None
195
+ self.instanceColor = None
196
+
197
+ def __str__(self):
198
+ return self.name
199
+
200
+
201
+ def generateMeshes(self):
202
+ pass
203
+
204
+
205
+ # The annotation of a whole image, including semantic and instance
206
+ class Annotation2D:
207
+ # Constructor
208
+ def __init__(self, colormap='Set1'):
209
+ # the width of that image and thus of the label image
210
+ self.imgWidth = 0
211
+ # the height of that image and thus of the label image
212
+ self.imgHeight = 0
213
+
214
+ self.instanceId = None
215
+ self.semanticId = None
216
+ self.instanceImg = None
217
+ self.semanticImg = None
218
+
219
+ # savedId = semanticId*N + instanceId
220
+ self.N = 1000
221
+
222
+ # colormap
223
+ self.cmap = cm.get_cmap(colormap)
224
+
225
+ if colormap == 'Set1':
226
+ self.cmap_length = 9
227
+ else:
228
+ raise "Colormap length need to be specified!"
229
+
230
+ def getColor(self, idx):
231
+ if idx==0:
232
+ return np.array([0,0,0])
233
+ return np.asarray(self.cmap(idx % self.cmap_length)[:3])*255.
234
+
235
+ # Load confidence map
236
+ def loadConfidence(self, imgPath):
237
+ self.confidenceMap = io.imread(imgPath)
238
+ self.confidenceMap = np.asarray(self.confidenceMap).astype(np.float)/255.
239
+
240
+ # Load instance id
241
+ def loadInstance(self, imgPath, gtType='instance', toImg=True, contourType='instance', semanticCt=True, instanceCt=True):
242
+ instanceId = io.imread(imgPath)
243
+ self.instanceId = np.asarray( instanceId % self.N )
244
+ self.semanticId = np.asarray( instanceId // self.N )
245
+
246
+ if not toImg:
247
+ return
248
+
249
+ if gtType=='semantic':
250
+ self.toSemanticImage()
251
+
252
+ elif gtType=='instance':
253
+ self.toInstanceImage()
254
+
255
+ if semanticCt or instanceCt:
256
+ self.getBoundary()
257
+
258
+ if gtType=='semantic' and semanticCt:
259
+ boundaryImg = self.toBoundaryImage(contourType=contourType, instanceOnly=False)
260
+ self.semanticImg = self.semanticImg * (1-boundaryImg) + \
261
+ np.ones_like(self.semanticImg) * boundaryImg * 255
262
+
263
+ if gtType=='instance' and instanceCt:
264
+ boundaryImg = self.toBoundaryImage(contourType=contourType, instanceOnly=True)
265
+ self.instanceImg = self.instanceImg * (1-boundaryImg) + \
266
+ np.ones_like(self.instanceImg) * boundaryImg * 255
267
+
268
+
269
+ def toSemanticImage(self):
270
+ self.semanticImg = np.zeros((self.semanticId.size, 3))
271
+ for label in labels:
272
+ mask = self.semanticId==label.id
273
+ mask = mask.flatten()
274
+ self.semanticImg[mask] = np.asarray(label.color)
275
+ self.semanticImg = self.semanticImg.reshape(*self.semanticId.shape, 3)
276
+
277
+ def toInstanceImage(self):
278
+ self.instanceImg = np.zeros((self.instanceId.size, 3))
279
+
280
+ uniqueId = np.unique(self.instanceId)
281
+ for uid in uniqueId:
282
+ mask = self.instanceId==uid
283
+ mask = mask.flatten()
284
+ self.instanceImg[mask] = np.asarray(self.getColor(uid))
285
+
286
+ self.instanceImg = self.instanceImg.reshape(*self.instanceId.shape, 3)
287
+
288
+ def getBoundary(self):
289
+ # semantic contours
290
+ uniqueId = np.unique(self.semanticId)
291
+ self.semanticContours = {}
292
+ for uid in uniqueId:
293
+ mask = (self.semanticId==uid).astype(np.uint8) * 255
294
+ mask_filter = filters.laplace(mask)
295
+ self.semanticContours[uid] = np.expand_dims(np.abs(mask_filter)>0, 2)
296
+
297
+ # instance contours
298
+ globalId = local2global(self.semanticId, self.instanceId)
299
+ uniqueId = np.unique(globalId)
300
+ self.instanceContours = {}
301
+ for uid in uniqueId:
302
+ mask = (globalId==uid).astype(np.uint8) * 255
303
+ mask_filter = filters.laplace(mask)
304
+ self.instanceContours[uid] = np.expand_dims(np.abs(mask_filter)>0, 2)
305
+
306
+ def toBoundaryImage(self, contourType='instance', instanceOnly=True):
307
+ if contourType=='semantic':
308
+ contours = self.semanticContours
309
+ assert(instanceOnly==False)
310
+ elif contourType=='instance':
311
+ contours = self.instanceContours
312
+ else:
313
+ raise ("Contour type can only be 'semantic' or 'instance'!")
314
+
315
+ if not instanceOnly:
316
+ boundaryImg = [contours[k] for k in contours.keys()]
317
+ else:
318
+ boundaryImg = [contours[k] for k in contours.keys() if global2local(k)[1]!=0]
319
+ boundaryImg = np.sum(np.asarray(boundaryImg), axis=0)
320
+ boundaryImg = boundaryImg>0
321
+ return boundaryImg
322
+
323
+
324
+ class Annotation2DInstance:
325
+ def __init__(self, gtPath, cam=0):
326
+
327
+ # trace the instances in all images
328
+ self.instanceDict = defaultdict(list)
329
+
330
+ #
331
+ instanceDictCached = os.path.join(gtPath, 'instanceDict.json')
332
+ print(instanceDictCached)
333
+ if os.path.isfile(instanceDictCached) and os.path.getsize(instanceDictCached)>0:
334
+ cachedDict = json.load( open(instanceDictCached) )
335
+ for k,v in cachedDict.items():
336
+ self.instanceDict[int(k)] = v
337
+ return
338
+
339
+ obj = Annotation2D()
340
+
341
+ gtPaths = glob.glob( os.path.join(gtPath, 'instance', '*.png') )
342
+ print (f'Found {len(gtPaths)} label images...')
343
+
344
+ for i,imgPath in enumerate(gtPaths):
345
+ if i%1000==0:
346
+ print(f'Processed {i}/{len(gtPaths)} label images...')
347
+ obj.loadInstance(imgPath, toImg=False)
348
+ globalId = local2global(obj.semanticId, obj.instanceId)
349
+ globalIdUnique = np.unique(globalId)
350
+ for idx in globalIdUnique:
351
+ self.instanceDict[int(idx)].append(os.path.basename(imgPath))
352
+
353
+ json.dump( self.instanceDict, open(instanceDictCached, 'w'))
354
+
355
+ # returns the paths that contains the specific instance
356
+ def __call__(self, semanticId, instanceId):
357
+ globalId = local2global(semanticId, instanceId)
358
+ return self.instanceDict[globalId]
359
+
360
+ # Meta class for KITTI360Bbox3D
361
+ class Annotation3D:
362
+ # Constructor
363
+ def __init__(self, labelDir='', sequence=''):
364
+
365
+ labelPath = glob.glob(os.path.join(labelDir, '*', '%s.xml' % sequence)) # train or test
366
+ if len(labelPath)!=1:
367
+ raise RuntimeError('%s does not exist! Please specify KITTI360_DATASET in your environment path.' % labelPath)
368
+ else:
369
+ labelPath = labelPath[0]
370
+ print('Loading %s...' % labelPath)
371
+
372
+ self.init_instance(labelPath)
373
+
374
+ def init_instance(self, labelPath):
375
+ # load annotation
376
+ tree = ET.parse(labelPath)
377
+ root = tree.getroot()
378
+
379
+ self.objects = defaultdict(dict)
380
+
381
+ self.num_bbox = 0
382
+
383
+ for child in root:
384
+ if child.find('transform') is None:
385
+ continue
386
+ obj = KITTI360Bbox3D()
387
+ obj.parseBbox(child)
388
+ globalId = local2global(obj.semanticId, obj.instanceId)
389
+ self.objects[globalId][obj.timestamp] = obj
390
+ self.num_bbox+=1
391
+
392
+ globalIds = np.asarray(list(self.objects.keys()))
393
+ semanticIds, instanceIds = global2local(globalIds)
394
+ for label in labels:
395
+ if label.hasInstances:
396
+ print(f'{label.name:<30}:\t {(semanticIds==label.id).sum()}')
397
+ print(f'Loaded {len(globalIds)} instances')
398
+ print(f'Loaded {self.num_bbox} boxes')
399
+
400
+
401
+ def __call__(self, semanticId, instanceId, timestamp=None):
402
+ globalId = local2global(semanticId, instanceId)
403
+ if globalId in self.objects.keys():
404
+ # static object
405
+ if len(self.objects[globalId].keys())==1:
406
+ if -1 in self.objects[globalId].keys():
407
+ return self.objects[globalId][-1]
408
+ else:
409
+ return None
410
+ # dynamic object
411
+ else:
412
+ return self.objects[globalId][timestamp]
413
+ else:
414
+ return None
415
+
416
+ class Annotation3DPly:
417
+ # parse fused 3D point cloud
418
+ def __init__(self, labelDir='', sequence='', isLabeled=True, isDynamic=False, showStatic=True):
419
+
420
+ if isLabeled and not isDynamic:
421
+ # x y z r g b semanticId instanceId isVisible confidence
422
+ self.fmt = '=fffBBBiiBf'
423
+ self.fmt_len = 28
424
+ elif isLabeled and isDynamic:
425
+ # x y z r g b semanticId instanceId isVisible timestamp confidence
426
+ self.fmt = '=fffBBBiiBif'
427
+ self.fmt_len = 32
428
+ elif not isLabeled and not isDynamic:
429
+ # x y z r g b
430
+ self.fmt = '=fffBBBB'
431
+ self.fmt_len = 16
432
+ else:
433
+ raise RuntimeError('Invalid binary format!')
434
+
435
+ # True for training data, False for testing data
436
+ self.isLabeled = isLabeled
437
+ # True for dynamic data, False for static data
438
+ self.isDynamic = isDynamic
439
+ # True for inspecting static data, False for inspecting dynamic data
440
+ self.showStatic = showStatic
441
+
442
+ pcdFolder = 'static' if self.showStatic else 'dynamic'
443
+ trainTestDir = 'train' if self.isLabeled else 'test'
444
+ self.pcdFileList = sorted(glob.glob(os.path.join(labelDir, trainTestDir, sequence, pcdFolder, '*.ply')))
445
+
446
+ print('Found %d ply files in %s' % (len(self.pcdFileList), sequence))
447
+
448
+ def readBinaryPly(self, pcdFile, n_pts=None):
449
+
450
+ with open(pcdFile, 'rb') as f:
451
+ plyData = f.readlines()
452
+
453
+ headLine = plyData.index(b'end_header\n')+1
454
+ plyData = plyData[headLine:]
455
+ plyData = b"".join(plyData)
456
+
457
+ n_pts_loaded = len(plyData)/self.fmt_len
458
+ # sanity check
459
+ if n_pts:
460
+ assert(n_pts_loaded==n_pts)
461
+ n_pts_loaded = int(n_pts_loaded)
462
+
463
+ data = []
464
+ for i in range(n_pts_loaded):
465
+ pts=struct.unpack(self.fmt, plyData[i*self.fmt_len:(i+1)*self.fmt_len])
466
+ data.append(pts)
467
+ data=np.asarray(data)
468
+
469
+ return data
470
+
471
+ def writeBinaryPly(self, pcdFile, data):
472
+ fmt = '=fffBBBiiB'
473
+ fmt_len = 24
474
+ n_pts = data.shape[0]
475
+
476
+ with open(pcdFile, 'wb') as f:
477
+ f.write(b'ply\n')
478
+ f.write(b'format binary_little_endian 1.0\n')
479
+ f.write(b'comment author Yiyi Liao\n')
480
+ f.write(b'element vertex %d\n' % n_pts)
481
+ f.write(b'property float x\n')
482
+ f.write(b'property float y\n')
483
+ f.write(b'property float z\n')
484
+ f.write(b'property uchar red\n')
485
+ f.write(b'property uchar green\n')
486
+ f.write(b'property uchar blue\n')
487
+ f.write(b'property int semantic\n')
488
+
489
+
490
+ class Annotation3DInstance(object):
491
+ instance_id = 0
492
+ labelId = 0
493
+ vert_count = 0
494
+ med_dist = -1
495
+ dist_conf = 0.0
496
+
497
+ def __init__(self, mesh_vert_instances, instance_id):
498
+ if (instance_id == -1):
499
+ return
500
+ self.instance_id = int(instance_id)
501
+ self.labelId = int(self.get_labelId(instance_id))
502
+ self.vert_count = int(self.get_instance_verts(mesh_vert_instances, instance_id))
503
+
504
+ def get_labelId(self, instance_id):
505
+ return int(instance_id // 1000)
506
+
507
+ def get_instance_verts(self, mesh_vert_instances, instance_id):
508
+ return (mesh_vert_instances == instance_id).sum()
509
+
510
+ def to_json(self):
511
+ return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
512
+
513
+ def to_dict(self):
514
+ dict = {}
515
+ dict["instance_id"] = self.instance_id
516
+ dict["labelId"] = self.labelId
517
+ dict["vert_count"] = self.vert_count
518
+ dict["med_dist"] = self.med_dist
519
+ dict["dist_conf"] = self.dist_conf
520
+ return dict
521
+
522
+ def from_json(self, data):
523
+ self.instance_id = int(data["instance_id"])
524
+ self.labelId = int(data["labelId"])
525
+ self.vert_count = int(data["vert_count"])
526
+ if ("med_dist" in data):
527
+ self.med_dist = float(data["med_dist"])
528
+ self.dist_conf = float(data["dist_conf"])
529
+
530
+ def __str__(self):
531
+ return "("+str(self.instance_id)+")"
532
+
533
+ # a dummy example
534
+ if __name__ == "__main__":
535
+
536
+ ann = Annotation3D()
537
+
538
+
datasets/kitti_360/compute_kitti_360_bbox_split.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ from pathlib import Path
4
+
5
+ DRY_RUN = False
6
+
7
+ CUT = [7, 9, 10, 15, 19, 31, 43, 69, 87, 107, 118, 154, 156, 167, 168, 170, 171, 172, 173, 174, 178, 179, 180, 181,
8
+ 182, 183, 184, 185, 187, 188, 193, 194, 195, 196, 201, 202, 203, 209, 210, 212, 213, 214, 215, 216, 217, 218,
9
+ 219, 220, 221, 222, 224, 225, 226, 229, 230, 231, 234, 235, 236, 237, 238, 256, 257, 258, 267, 278, 283, 293,
10
+ 294, 295, 296, 297, 298, 299, 310, 315, 317, 318, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333,
11
+ 334, 335, 336, 337, 340, 341, 349, 353, 354, 361, 362, 365, 366, 368, 371, 372, 376, 380, 386, 387, 394, 402,
12
+ 403, 404, 411, 414, 415, 416, 420, 438, 441, 448, 452, 456, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482,
13
+ 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 519, 520, 554, 562, 593, 594,
14
+ 596]
15
+
16
+
17
+ def check_integrity(data_path, seq, img_id):
18
+ persp = data_path / "data_2d_raw" / seq / "image_00" / "data_rect" / f"{img_id:010d}.png"
19
+ fish = data_path / "data_2d_raw" / seq / "image_02" / "data_rgb" / f"{img_id:010d}.png"
20
+
21
+ return fish.exists() and persp.exists()
22
+
23
+
24
+ def main():
25
+ parser = argparse.ArgumentParser("KITTI Raw NVS Split")
26
+ parser.add_argument("--data_path", "-d", type=str)
27
+ parser.add_argument("--out_path", "-o", type=str)
28
+ parser.add_argument("--offset", type=int, default=20)
29
+
30
+ args = parser.parse_args()
31
+
32
+ data_path = Path(args.data_path)
33
+ out_path = Path(args.out_path)
34
+ offset = args.offset
35
+
36
+ print("Setting up folders...")
37
+ Path(out_path).mkdir(parents=True, exist_ok=True)
38
+
39
+ segmentation_train_file = data_path / "data_2d_semantics" / "train" / "2013_05_28_drive_train_frames.txt"
40
+ segmentation_val_file = data_path / "data_2d_semantics" / "train" / "2013_05_28_drive_val_frames.txt"
41
+
42
+ with open(segmentation_train_file, "r") as f:
43
+ train_lines = f.readlines()
44
+ with open(segmentation_val_file, "r") as f:
45
+ val_lines = f.readlines()
46
+
47
+ train_files = []
48
+ val_files = []
49
+ test_files = []
50
+
51
+ invalid = 0
52
+
53
+ for i in range(len(train_lines)):
54
+ parts = train_lines[i].split(" ")
55
+ img_path = parts[0]
56
+
57
+ parts = img_path.split("/")
58
+ sequence = parts[1]
59
+ img_id = int(parts[-1][-14:-4])
60
+
61
+ if not check_integrity(data_path, sequence, img_id):
62
+ invalid += 1
63
+ continue
64
+
65
+ train_files.append(f"{sequence} {img_id:010d} l")
66
+ train_files.append(f"{sequence} {img_id:010d} r")
67
+
68
+ for i in range(0, len(val_lines)):
69
+ parts = val_lines[i].split(" ")
70
+ img_path = parts[0]
71
+ seg_path = parts[1][:-1]
72
+
73
+ parts = img_path.split("/")
74
+ sequence = parts[1]
75
+ img_id = int(parts[-1][-14:-4])
76
+
77
+ is_test = (i % offset) == 0
78
+
79
+ if not check_integrity(data_path, sequence, img_id):
80
+ invalid += 1
81
+ continue
82
+
83
+ if not is_test:
84
+ val_files.append(f"{sequence} {img_id:010d} l")
85
+ else:
86
+ test_files.append(f"{sequence} {img_id:010d} l")
87
+
88
+ print(f"Found: Train={len(train_files)}, Val={len(val_files)}, Test={len(test_files)} test files.")
89
+ print(f"Found: {invalid} invalids.")
90
+
91
+ test_files = [s for i, s in enumerate(test_files) if not i in CUT]
92
+
93
+ print(f"{len(CUT)} test files removed. {len(test_files)} remaining.")
94
+
95
+ train_file = out_path / f"train_files.txt"
96
+ val_file = out_path / f"val_files.txt"
97
+ test_file = out_path / f"test_files.txt"
98
+
99
+ with open(train_file, "w") as f:
100
+ f.writelines("\n".join(train_files))
101
+
102
+ with open(val_file, "w") as f:
103
+ f.writelines("\n".join(val_files))
104
+
105
+ with open(test_file, "w") as f:
106
+ f.writelines("\n".join(test_files))
107
+
108
+
109
+ if __name__ == "__main__":
110
+ main()
datasets/kitti_360/kitti_360_dataset.py ADDED
@@ -0,0 +1,1263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import xml.etree.ElementTree as ET
4
+ from collections import Counter, defaultdict
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ import cv2
9
+ import numpy as np
10
+ import torch
11
+ import torch.nn.functional as F
12
+ import yaml
13
+ from scipy.spatial.transform import Rotation
14
+ from torch.utils.data import Dataset
15
+ from torchvision.transforms import ColorJitter
16
+
17
+ from datasets.kitti_360.annotation import KITTI360Bbox3D
18
+ from scenedino.common.augmentation import get_color_aug_fn
19
+
20
+ import omegaconf
21
+
22
+
23
+ class FisheyeToPinholeSampler:
24
+ def __init__(self, K_target, target_image_size, calibs, rotation=None):
25
+ self._compute_transform(K_target, target_image_size, calibs, rotation)
26
+
27
+ def _compute_transform(self, K_target, target_image_size, calibs, rotation=None):
28
+ x = (
29
+ torch.linspace(-1, 1, target_image_size[1])
30
+ .view(1, -1)
31
+ .expand(target_image_size)
32
+ )
33
+ y = (
34
+ torch.linspace(-1, 1, target_image_size[0])
35
+ .view(-1, 1)
36
+ .expand(target_image_size)
37
+ )
38
+ z = torch.ones_like(x)
39
+ xyz = torch.stack((x, y, z), dim=-1).view(-1, 3)
40
+
41
+ # Unproject
42
+ xyz = (torch.inverse(torch.tensor(K_target)) @ xyz.T).T
43
+
44
+ if rotation is not None:
45
+ xyz = (torch.tensor(rotation) @ xyz.T).T
46
+
47
+ # Backproject into fisheye
48
+ xyz = xyz / torch.norm(xyz, dim=-1, keepdim=True)
49
+ x = xyz[:, 0]
50
+ y = xyz[:, 1]
51
+ z = xyz[:, 2]
52
+
53
+ xi_src = calibs["mirror_parameters"]["xi"]
54
+ x = x / (z + xi_src)
55
+ y = y / (z + xi_src)
56
+
57
+ k1 = calibs["distortion_parameters"]["k1"]
58
+ k2 = calibs["distortion_parameters"]["k2"]
59
+
60
+ r = x * x + y * y
61
+ factor = 1 + k1 * r + k2 * r * r
62
+ x = x * factor
63
+ y = y * factor
64
+
65
+ gamma0 = calibs["projection_parameters"]["gamma1"]
66
+ gamma1 = calibs["projection_parameters"]["gamma2"]
67
+ u0 = calibs["projection_parameters"]["u0"]
68
+ v0 = calibs["projection_parameters"]["v0"]
69
+
70
+ x = x * gamma0 + u0
71
+ y = y * gamma1 + v0
72
+
73
+ xy = torch.stack((x, y), dim=-1).view(1, *target_image_size, 2)
74
+ self.sample_pts = xy
75
+
76
+ def resample(self, img):
77
+ img = img.unsqueeze(0)
78
+ resampled_img = F.grid_sample(img, self.sample_pts, align_corners=True).squeeze(
79
+ 0
80
+ )
81
+ return resampled_img
82
+
83
+
84
+ # TODO: probably move to KITTI-360 dataset
85
+ # The KITTI 360 cameras have a 5 degrees negative inclination. We need to account for that.
86
+ cam_incl_adjust = torch.tensor(
87
+ [
88
+ [1.0000000, 0.0000000, 0.0000000, 0],
89
+ [0.0000000, 0.9961947, 0.0871557, 0],
90
+ [0.0000000, -0.0871557, 0.9961947, 0],
91
+ [0.0000000, 000000000, 0.0000000, 1],
92
+ ],
93
+ dtype=torch.float32,
94
+ ).view(1, 1, 4, 4)
95
+
96
+
97
+ def get_pts(x_range, y_range, z_range, ppm, ppm_y, y_res=None): ## ppm:=pts_per_meter
98
+ x_res = abs(int((x_range[1] - x_range[0]) * ppm))
99
+ if y_res is None:
100
+ y_res = abs(int((y_range[1] - y_range[0]) * ppm_y))
101
+ z_res = abs(int((z_range[1] - z_range[0]) * ppm))
102
+ x = (
103
+ torch.linspace(x_range[0], x_range[1], x_res)
104
+ .view(1, 1, x_res)
105
+ .expand(y_res, z_res, -1)
106
+ )
107
+ z = (
108
+ torch.linspace(z_range[0], z_range[1], z_res)
109
+ .view(1, z_res, 1)
110
+ .expand(y_res, -1, x_res)
111
+ )
112
+ if y_res == 1:
113
+ y = (
114
+ torch.tensor([y_range[0] * 0.5 + y_range[1] * 0.5])
115
+ .view(y_res, 1, 1)
116
+ .expand(-1, z_res, x_res)
117
+ )
118
+ else:
119
+ y = (
120
+ torch.linspace(y_range[0], y_range[1], y_res)
121
+ .view(y_res, 1, 1)
122
+ .expand(-1, z_res, x_res)
123
+ )
124
+ xyz = torch.stack((x, y, z), dim=-1)
125
+
126
+ return xyz, (x_res, y_res, z_res)
127
+
128
+
129
+ # This function takes all points between min_y and max_y and projects them into the x-z plane.
130
+ # To avoid cases where there are no points at the top end, we consider also points that are beyond the maximum z distance.
131
+ # The points are then converted to polar coordinates and sorted by angle.
132
+
133
+
134
+ def get_lidar_slices(point_clouds, velo_poses, y_range, y_res, max_dist):
135
+ slices = []
136
+ ys = torch.linspace(y_range[0], y_range[1], y_res)
137
+ if y_res > 1:
138
+ slice_height = ys[1] - ys[0]
139
+ else:
140
+ slice_height = 0
141
+ n_bins = 360
142
+
143
+ for y in ys:
144
+ if y_res == 1:
145
+ min_y = y
146
+ max_y = y_range[-1]
147
+ else:
148
+ min_y = y - slice_height / 2
149
+ max_y = y + slice_height / 2
150
+
151
+ slice = []
152
+
153
+ for pc, velo_pose in zip(point_clouds, velo_poses):
154
+ pc_world = (velo_pose @ pc.T).T
155
+
156
+ mask = ((pc_world[:, 1] >= min_y) & (pc_world[:, 1] <= max_y)) | (
157
+ torch.norm(pc_world[:, :3], dim=-1) >= max_dist
158
+ )
159
+
160
+ slice_points = pc[mask, :2]
161
+
162
+ angles = torch.atan2(slice_points[:, 1], slice_points[:, 0])
163
+ dists = torch.norm(slice_points, dim=-1)
164
+
165
+ slice_points_polar = torch.stack((angles, dists), dim=1)
166
+ # Sort by angles for fast lookup
167
+ slice_points_polar = slice_points_polar[torch.sort(angles)[1], :]
168
+
169
+ slice_points_polar_binned = torch.zeros_like(slice_points_polar[:n_bins, :])
170
+ bin_borders = torch.linspace(
171
+ -math.pi, math.pi, n_bins + 1, device=slice_points_polar.device
172
+ )
173
+
174
+ dist = slice_points_polar[0, 1]
175
+
176
+ # To reduce noise, we bin the lidar points into bins of 1deg and then take the minimum distance per bin.
177
+ border_is = torch.searchsorted(slice_points_polar[:, 0], bin_borders)
178
+
179
+ for i in range(n_bins):
180
+ left_i, right_i = border_is[i], border_is[i + 1]
181
+ angle = (bin_borders[i] + bin_borders[i + 1]) * 0.5
182
+ if right_i > left_i:
183
+ dist = torch.min(slice_points_polar[left_i:right_i, 1])
184
+ slice_points_polar_binned[i, 0] = angle
185
+ slice_points_polar_binned[i, 1] = dist
186
+
187
+ slice_points_polar = slice_points_polar_binned
188
+
189
+ # Append first element to last to have full 360deg coverage
190
+ slice_points_polar = torch.cat(
191
+ (
192
+ torch.tensor(
193
+ [
194
+ [
195
+ slice_points_polar[-1, 0] - math.pi * 2,
196
+ slice_points_polar[-1, 1],
197
+ ]
198
+ ],
199
+ device=slice_points_polar.device,
200
+ ),
201
+ slice_points_polar,
202
+ torch.tensor(
203
+ [
204
+ [
205
+ slice_points_polar[0, 0] + math.pi * 2,
206
+ slice_points_polar[0, 1],
207
+ ]
208
+ ],
209
+ device=slice_points_polar.device,
210
+ ),
211
+ ),
212
+ dim=0,
213
+ )
214
+
215
+ slice.append(slice_points_polar)
216
+
217
+ slices.append(slice)
218
+
219
+ return slices
220
+
221
+
222
+ def check_occupancy(pts, slices, velo_poses, min_dist=3):
223
+ is_occupied = torch.ones_like(pts[:, 0])
224
+ is_visible = torch.zeros_like(pts[:, 0], dtype=torch.bool)
225
+
226
+ thresh = (len(slices[0]) - 2) / len(slices[0])
227
+
228
+ pts = torch.cat((pts, torch.ones_like(pts[:, :1])), dim=-1)
229
+
230
+ world_to_velos = torch.inverse(velo_poses)
231
+
232
+ step = pts.shape[0] // len(slices)
233
+
234
+ for i, slice in enumerate(slices):
235
+ for j, (lidar_polar, world_to_velo) in enumerate(zip(slice, world_to_velos)):
236
+ pts_velo = (world_to_velo @ pts[i * step : (i + 1) * step, :].T).T
237
+
238
+ # Convert query points to polar coordinates in velo space
239
+ angles = torch.atan2(pts_velo[:, 1], pts_velo[:, 0])
240
+ dists = torch.norm(pts_velo, dim=-1)
241
+
242
+ indices = torch.searchsorted(lidar_polar[:, 0].contiguous(), angles)
243
+
244
+ left_angles = lidar_polar[indices - 1, 0]
245
+ right_angles = lidar_polar[indices, 0]
246
+
247
+ left_dists = lidar_polar[indices - 1, 1]
248
+ right_dists = lidar_polar[indices, 1]
249
+
250
+ interp = (angles - left_angles) / (right_angles - left_angles)
251
+ surface_dist = left_dists * (1 - interp) + right_dists * interp
252
+
253
+ is_occupied_velo = (dists > surface_dist) | (dists < min_dist)
254
+
255
+ is_occupied[i * step : (i + 1) * step] += is_occupied_velo.float()
256
+
257
+ if j == 0:
258
+ is_visible[i * step : (i + 1) * step] |= ~is_occupied_velo
259
+
260
+ is_occupied /= len(slices[0])
261
+
262
+ is_occupied = is_occupied > thresh
263
+
264
+ return is_occupied, is_visible
265
+
266
+
267
+ class KITTIVelodyn:
268
+ def __init__(self, config) -> None:
269
+ self.config = config
270
+ self.occ_pts, self.yd = self._gen_pts()
271
+
272
+ def _gen_pts(self) -> torch.Tensor:
273
+ q_pts, (xd, yd, zd) = get_pts(
274
+ self.x_range, self.y_range, self.z_range, self.ppm, self.ppm_y, self.y_res
275
+ )
276
+ return q_pts, yd
277
+
278
+ def check_occupancy(self, points_all, velo_poses):
279
+ slices = get_lidar_slices(
280
+ points_all,
281
+ velo_poses,
282
+ self.config["y_range"],
283
+ self.yd,
284
+ (self.self.config["z_range"][0] ** 2 + self.self.config["x_range"][0] ** 2)
285
+ ** 0.5,
286
+ )
287
+ is_occupied, is_visible = check_occupancy(self.occ_pts, slices, velo_poses)
288
+
289
+ return is_occupied, is_visible
290
+
291
+
292
+ class Kitti360Dataset(Dataset):
293
+ def __init__(
294
+ self,
295
+ data_path: str,
296
+ pose_path: str,
297
+ split_path: Optional[str],
298
+ target_image_size=(192, 640),
299
+ return_stereo=False,
300
+ return_depth=False,
301
+ return_fisheye=True, ## default: True
302
+ return_3d_bboxes=False,
303
+ return_segmentation=False,
304
+ frame_count=2,
305
+ keyframe_offset=0,
306
+ dilation=1,
307
+ fisheye_rotation=0,
308
+ fisheye_offset=0,
309
+ stereo_offset=0,
310
+ eigen_depth=True,
311
+ color_aug=False,
312
+ is_preprocessed=False,
313
+ kitti_velodyn: KITTIVelodyn | None = None,
314
+ ):
315
+ self.data_path = data_path
316
+ self.pose_path = pose_path
317
+ self.split_path = split_path
318
+ self.target_image_size = target_image_size
319
+ self.return_stereo = return_stereo
320
+ self.return_fisheye = return_fisheye
321
+ self.return_depth = return_depth
322
+ self.return_3d_bboxes = return_3d_bboxes
323
+ self.return_segmentation = return_segmentation
324
+ self.frame_count = frame_count
325
+ self.dilation = dilation
326
+ self.fisheye_rotation = fisheye_rotation
327
+ self.fisheye_offset = fisheye_offset
328
+ self.stereo_offset = stereo_offset
329
+ self.keyframe_offset = keyframe_offset
330
+ self.eigen_depth = eigen_depth
331
+ self.color_aug = color_aug
332
+ self.is_preprocessed = is_preprocessed
333
+ self.kitti_velodyn = kitti_velodyn
334
+
335
+ if isinstance(self.fisheye_rotation, float) or isinstance(
336
+ self.fisheye_rotation, int
337
+ ):
338
+ self.fisheye_rotation = (0, self.fisheye_rotation)
339
+ self.fisheye_rotation = tuple(self.fisheye_rotation)
340
+
341
+ # if additional_random_front_offset and not self.random_fisheye_offset:
342
+ # raise ValueError("Random Fisheye Offset needs to be active for additional random front offset!")
343
+ # else:
344
+ # self.additional_random_front_offset = additional_random_front_offset
345
+
346
+ # Support random fisheye offset
347
+ if type(self.fisheye_offset) == int:
348
+ self.random_fisheye_offset = False
349
+ self.fisheye_offset = (self.fisheye_offset,)
350
+ elif type(self.fisheye_offset) in [
351
+ tuple,
352
+ list,
353
+ omegaconf.listconfig.ListConfig,
354
+ ]:
355
+ self.random_fisheye_offset = True
356
+ self.fisheye_offset = tuple(sorted(self.fisheye_offset))
357
+ else:
358
+ raise ValueError(
359
+ f"Invalid datatype for fisheye offset: {type(self.fisheye_offset)}"
360
+ )
361
+
362
+ if type(self.stereo_offset) == int:
363
+ self.random_stereo_offset = False
364
+ self.stereo_offset = (self.stereo_offset,)
365
+ elif type(self.stereo_offset) in [tuple, list, omegaconf.listconfig.ListConfig]:
366
+ self.random_stereo_offset = True
367
+ self.stereo_offset = tuple(sorted(self.stereo_offset))
368
+ else:
369
+ raise ValueError(
370
+ f"Invalid datatype for fisheye offset: {type(self.stereo_offset)}"
371
+ )
372
+
373
+ self._sequences = self._get_sequences(self.data_path)
374
+
375
+ self._calibs = self._load_calibs(self.data_path, self.fisheye_rotation)
376
+ self._resampler_02, self._resampler_03 = self._get_resamplers(
377
+ self._calibs, self._calibs["K_fisheye"], self.target_image_size
378
+ )
379
+ self._img_ids, self._poses = self._load_poses(self.pose_path, self._sequences)
380
+ self._left_offset = (
381
+ (self.frame_count - 1) // 2 + self.keyframe_offset
382
+ ) * self.dilation
383
+
384
+ self._perspective_folder = (
385
+ "data_rect"
386
+ if not self.is_preprocessed
387
+ else f"data_{self.target_image_size[0]}x{self.target_image_size[1]}"
388
+ )
389
+ self._fisheye_folder = (
390
+ "data_rgb"
391
+ if not self.is_preprocessed
392
+ else f"data_{self.target_image_size[0]}x{self.target_image_size[1]}_{self.fisheye_rotation[0]}x{self.fisheye_rotation[1]}"
393
+ )
394
+
395
+ if self.split_path is not None:
396
+ self._datapoints = self._load_split(self.split_path, self._img_ids)
397
+ elif self.return_segmentation:
398
+ self._datapoints = self._semantics_split(
399
+ self._sequences, self.data_path, self._img_ids
400
+ )
401
+ else:
402
+ self._datapoints = self._full_split(
403
+ self._sequences, self._img_ids, self.check_file_integrity
404
+ )
405
+
406
+ if self.return_3d_bboxes:
407
+ self._3d_bboxes = self._load_3d_bboxes(
408
+ Path(data_path) / "data_3d_bboxes" / "train_full", self._sequences
409
+ )
410
+
411
+ if self.return_segmentation:
412
+ # Segmentations are only provided for the left camera
413
+ self._datapoints = [dp for dp in self._datapoints if not dp[2]]
414
+
415
+ self._skip = 0
416
+ self.length = len(self._datapoints)
417
+
418
+ def check_file_integrity(self, seq, id):
419
+ dp = Path(self.data_path)
420
+ image_00 = dp / "data_2d_raw" / seq / "image_00" / self._perspective_folder
421
+ image_01 = dp / "data_2d_raw" / seq / "image_01" / self._perspective_folder
422
+ image_02 = dp / "data_2d_raw" / seq / "image_02" / self._fisheye_folder
423
+ image_03 = dp / "data_2d_raw" / seq / "image_03" / self._fisheye_folder
424
+
425
+ seq_len = self._img_ids[seq].shape[0]
426
+
427
+ ids = [id] + [
428
+ max(min(i, seq_len - 1), 0)
429
+ for i in range(
430
+ id - self._left_offset,
431
+ id - self._left_offset + self.frame_count * self.dilation,
432
+ self.dilation,
433
+ )
434
+ if i != id
435
+ ]
436
+ ids_fish = [max(min(id + self.fisheye_offset, seq_len - 1), 0)] + [
437
+ max(min(i, seq_len - 1), 0)
438
+ for i in range(
439
+ id + self.fisheye_offset - self._left_offset,
440
+ id
441
+ + self.fisheye_offset
442
+ - self._left_offset
443
+ + self.frame_count * self.dilation,
444
+ self.dilation,
445
+ )
446
+ if i != id + self.fisheye_offset
447
+ ]
448
+
449
+ img_ids = [self.get_img_id_from_id(seq, id) for id in ids]
450
+ img_ids_fish = [self.get_img_id_from_id(seq, id) for id in ids_fish]
451
+
452
+ for img_id in img_ids:
453
+ if not (
454
+ (image_00 / f"{img_id:010d}.png").exists()
455
+ and (image_01 / f"{img_id:010d}.png").exists()
456
+ ):
457
+ return False
458
+ if self.return_fisheye:
459
+ for img_id in img_ids_fish:
460
+ if not (
461
+ (image_02 / f"{img_id:010d}.png").exists()
462
+ and (image_03 / f"{img_id:010d}.png").exists()
463
+ ):
464
+ return False
465
+ return True
466
+
467
+ @staticmethod
468
+ def _get_sequences(data_path):
469
+ all_sequences = []
470
+
471
+ seqs_path = Path(data_path) / "data_2d_raw"
472
+ for seq in seqs_path.iterdir():
473
+ if not seq.is_dir():
474
+ continue
475
+ all_sequences.append(seq.name)
476
+
477
+ return all_sequences
478
+
479
+ @staticmethod
480
+ def _full_split(sequences, img_ids, check_integrity):
481
+ datapoints = []
482
+ for seq in sorted(sequences):
483
+ ids = [id for id in range(len(img_ids[seq])) if check_integrity(seq, id)]
484
+ datapoints_seq = [(seq, id, False) for id in ids] + [
485
+ (seq, id, True) for id in ids
486
+ ]
487
+ datapoints.extend(datapoints_seq)
488
+ return datapoints
489
+
490
+ @staticmethod
491
+ def _semantics_split(sequences, data_path, img_ids):
492
+ datapoints = []
493
+ for seq in sorted(sequences):
494
+ datapoints_seq = [(seq, id, False) for id in range(len(img_ids[seq]))]
495
+ datapoints_seq = [
496
+ dp
497
+ for dp in datapoints_seq
498
+ if os.path.exists(
499
+ os.path.join(
500
+ data_path,
501
+ "data_2d_semantics",
502
+ "train",
503
+ seq,
504
+ "image_00",
505
+ "semantic_rgb",
506
+ f"{img_ids[seq][dp[1]]:010d}.png",
507
+ )
508
+ )
509
+ ]
510
+ datapoints.extend(datapoints_seq)
511
+ return datapoints
512
+
513
+ @staticmethod
514
+ def _load_split(split_path, img_ids):
515
+ img_id2id = {
516
+ seq: {id: i for i, id in enumerate(ids)} for seq, ids in img_ids.items()
517
+ }
518
+
519
+ with open(split_path, "r") as f:
520
+ lines = f.readlines()
521
+
522
+ def split_line(l):
523
+ segments = l.split(" ")
524
+ seq = segments[0]
525
+ id = img_id2id[seq][int(segments[1])]
526
+ return seq, id, segments[2][0] == "r"
527
+
528
+ return list(map(split_line, lines))
529
+
530
+ @staticmethod
531
+ def _load_calibs(data_path, fisheye_rotation=0):
532
+ data_path = Path(data_path)
533
+
534
+ calib_folder = data_path / "calibration"
535
+ cam_to_pose_file = calib_folder / "calib_cam_to_pose.txt"
536
+ cam_to_velo_file = calib_folder / "calib_cam_to_velo.txt"
537
+ intrinsics_file = calib_folder / "perspective.txt"
538
+ fisheye_02_file = calib_folder / "image_02.yaml"
539
+ fisheye_03_file = calib_folder / "image_03.yaml"
540
+
541
+ cam_to_pose_data = {}
542
+ with open(cam_to_pose_file, "r") as f:
543
+ for line in f.readlines():
544
+ key, value = line.split(":", 1)
545
+ try:
546
+ cam_to_pose_data[key] = np.array(
547
+ [float(x) for x in value.split()], dtype=np.float32
548
+ )
549
+ except ValueError:
550
+ pass
551
+
552
+ cam_to_velo_data = None
553
+ with open(cam_to_velo_file, "r") as f:
554
+ line = f.readline()
555
+ try:
556
+ cam_to_velo_data = np.array(
557
+ [float(x) for x in line.split()], dtype=np.float32
558
+ )
559
+ except ValueError:
560
+ pass
561
+
562
+ intrinsics_data = {}
563
+ with open(intrinsics_file, "r") as f:
564
+ for line in f.readlines():
565
+ key, value = line.split(":", 1)
566
+ try:
567
+ intrinsics_data[key] = np.array(
568
+ [float(x) for x in value.split()], dtype=np.float32
569
+ )
570
+ except ValueError:
571
+ pass
572
+
573
+ with open(fisheye_02_file, "r") as f:
574
+ f.readline() # Skips first line that defines the YAML version
575
+ fisheye_02_data = yaml.safe_load(f)
576
+
577
+ with open(fisheye_03_file, "r") as f:
578
+ f.readline() # Skips first line that defines the YAML version
579
+ fisheye_03_data = yaml.safe_load(f)
580
+
581
+ im_size_rect = (
582
+ int(intrinsics_data["S_rect_00"][1]),
583
+ int(intrinsics_data["S_rect_00"][0]),
584
+ )
585
+ im_size_fish = (fisheye_02_data["image_height"], fisheye_02_data["image_width"])
586
+
587
+ # Projection matrices
588
+ # We use these projection matrices also when resampling the fisheye cameras.
589
+ # This makes downstream processing easier, but it could be done differently.
590
+ P_rect_00 = np.reshape(intrinsics_data["P_rect_00"], (3, 4))
591
+ P_rect_01 = np.reshape(intrinsics_data["P_rect_01"], (3, 4))
592
+
593
+ # Rotation matrices from raw to rectified -> Needs to be inverted later
594
+ R_rect_00 = np.eye(4, dtype=np.float32)
595
+ R_rect_01 = np.eye(4, dtype=np.float32)
596
+ R_rect_00[:3, :3] = np.reshape(intrinsics_data["R_rect_00"], (3, 3))
597
+ R_rect_01[:3, :3] = np.reshape(intrinsics_data["R_rect_01"], (3, 3))
598
+
599
+ # Rotation matrices from resampled fisheye to raw fisheye
600
+ fisheye_rotation = np.array(fisheye_rotation).reshape((1, 2))
601
+ R_02 = np.eye(4, dtype=np.float32)
602
+ R_03 = np.eye(4, dtype=np.float32)
603
+ R_02[:3, :3] = (
604
+ Rotation.from_euler("xy", fisheye_rotation[:, [1, 0]], degrees=True)
605
+ .as_matrix()
606
+ .astype(np.float32)
607
+ )
608
+ R_03[:3, :3] = (
609
+ Rotation.from_euler(
610
+ "xy", fisheye_rotation[:, [1, 0]] * np.array([[1, -1]]), degrees=True
611
+ )
612
+ .as_matrix()
613
+ .astype(np.float32)
614
+ )
615
+
616
+ # Load cam to pose transforms
617
+ T_00_to_pose = np.eye(4, dtype=np.float32)
618
+ T_01_to_pose = np.eye(4, dtype=np.float32)
619
+ T_02_to_pose = np.eye(4, dtype=np.float32)
620
+ T_03_to_pose = np.eye(4, dtype=np.float32)
621
+ T_00_to_velo = np.eye(4, dtype=np.float32)
622
+
623
+ T_00_to_pose[:3, :] = np.reshape(cam_to_pose_data["image_00"], (3, 4))
624
+ T_01_to_pose[:3, :] = np.reshape(cam_to_pose_data["image_01"], (3, 4))
625
+ T_02_to_pose[:3, :] = np.reshape(cam_to_pose_data["image_02"], (3, 4))
626
+ T_03_to_pose[:3, :] = np.reshape(cam_to_pose_data["image_03"], (3, 4))
627
+ T_00_to_velo[:3, :] = np.reshape(cam_to_velo_data, (3, 4))
628
+
629
+ # Compute cam to pose transforms for rectified perspective cameras
630
+ T_rect_00_to_pose = T_00_to_pose @ np.linalg.inv(R_rect_00)
631
+ T_rect_01_to_pose = T_01_to_pose @ np.linalg.inv(R_rect_01)
632
+
633
+ # Compute cam to pose transform for fisheye cameras
634
+ T_02_to_pose = T_02_to_pose @ R_02
635
+ T_03_to_pose = T_03_to_pose @ R_03
636
+
637
+ # Compute velo to cameras and velo to pose transforms
638
+ T_velo_to_rect_00 = R_rect_00 @ np.linalg.inv(T_00_to_velo)
639
+ T_velo_to_pose = T_rect_00_to_pose @ T_velo_to_rect_00
640
+ T_velo_to_rect_01 = np.linalg.inv(T_rect_01_to_pose) @ T_velo_to_pose
641
+
642
+ # Calibration matrix is the same for both perspective cameras
643
+ K = P_rect_00[:3, :3]
644
+
645
+ # Normalize calibration
646
+ f_x = K[0, 0] / im_size_rect[1]
647
+ f_y = K[1, 1] / im_size_rect[0]
648
+ c_x = K[0, 2] / im_size_rect[1]
649
+ c_y = K[1, 2] / im_size_rect[0]
650
+
651
+ # Change to image coordinates [-1, 1]
652
+ K[0, 0] = f_x * 2.0
653
+ K[1, 1] = f_y * 2.0
654
+ K[0, 2] = c_x * 2.0 - 1
655
+ K[1, 2] = c_y * 2.0 - 1
656
+
657
+ # Convert fisheye calibration to [-1, 1] image dimensions
658
+ fisheye_02_data["projection_parameters"]["gamma1"] = (
659
+ fisheye_02_data["projection_parameters"]["gamma1"] / im_size_fish[1]
660
+ ) * 2.0
661
+ fisheye_02_data["projection_parameters"]["gamma2"] = (
662
+ fisheye_02_data["projection_parameters"]["gamma2"] / im_size_fish[0]
663
+ ) * 2.0
664
+ fisheye_02_data["projection_parameters"]["u0"] = (
665
+ fisheye_02_data["projection_parameters"]["u0"] / im_size_fish[1]
666
+ ) * 2.0 - 1.0
667
+ fisheye_02_data["projection_parameters"]["v0"] = (
668
+ fisheye_02_data["projection_parameters"]["v0"] / im_size_fish[0]
669
+ ) * 2.0 - 1.0
670
+
671
+ fisheye_03_data["projection_parameters"]["gamma1"] = (
672
+ fisheye_03_data["projection_parameters"]["gamma1"] / im_size_fish[1]
673
+ ) * 2.0
674
+ fisheye_03_data["projection_parameters"]["gamma2"] = (
675
+ fisheye_03_data["projection_parameters"]["gamma2"] / im_size_fish[0]
676
+ ) * 2.0
677
+ fisheye_03_data["projection_parameters"]["u0"] = (
678
+ fisheye_03_data["projection_parameters"]["u0"] / im_size_fish[1]
679
+ ) * 2.0 - 1.0
680
+ fisheye_03_data["projection_parameters"]["v0"] = (
681
+ fisheye_03_data["projection_parameters"]["v0"] / im_size_fish[0]
682
+ ) * 2.0 - 1.0
683
+
684
+ # Use same camera calibration as perspective cameras for resampling
685
+ # K_fisheye = np.eye(3, dtype=np.float32)
686
+ # K_fisheye[0, 0] = 2
687
+ # K_fisheye[1, 1] = 2
688
+
689
+ K_fisheye = K
690
+
691
+ calibs = {
692
+ "K_perspective": K,
693
+ "K_fisheye": K_fisheye,
694
+ "T_cam_to_pose": {
695
+ "00": T_rect_00_to_pose,
696
+ "01": T_rect_01_to_pose,
697
+ "02": T_02_to_pose,
698
+ "03": T_03_to_pose,
699
+ },
700
+ "T_velo_to_cam": {
701
+ "00": T_velo_to_rect_00,
702
+ "01": T_velo_to_rect_01,
703
+ },
704
+ "T_velo_to_pose": T_velo_to_pose,
705
+ "fisheye": {
706
+ "calib_02": fisheye_02_data,
707
+ "calib_03": fisheye_03_data,
708
+ "R_02": R_02[:3, :3],
709
+ "R_03": R_03[:3, :3],
710
+ },
711
+ "im_size": im_size_rect,
712
+ }
713
+
714
+ return calibs
715
+
716
+ @staticmethod
717
+ def _get_resamplers(calibs, K_target, target_image_size):
718
+ resampler_02 = FisheyeToPinholeSampler(
719
+ K_target,
720
+ target_image_size,
721
+ calibs["fisheye"]["calib_02"],
722
+ calibs["fisheye"]["R_02"],
723
+ )
724
+ resampler_03 = FisheyeToPinholeSampler(
725
+ K_target,
726
+ target_image_size,
727
+ calibs["fisheye"]["calib_03"],
728
+ calibs["fisheye"]["R_03"],
729
+ )
730
+
731
+ return resampler_02, resampler_03
732
+
733
+ @staticmethod
734
+ def _load_poses(pose_path, sequences):
735
+ ids = {}
736
+ poses = {}
737
+
738
+ for seq in sequences:
739
+ pose_file = Path(pose_path) / seq / f"poses.txt"
740
+
741
+ try:
742
+ pose_data = np.loadtxt(pose_file)
743
+ except FileNotFoundError:
744
+ print(f"Ground truth poses are not avaialble for sequence {seq}.")
745
+
746
+ ids_seq = pose_data[:, 0].astype(int)
747
+ poses_seq = pose_data[:, 1:].astype(np.float32).reshape((-1, 3, 4))
748
+ poses_seq = np.concatenate(
749
+ (poses_seq, np.zeros_like(poses_seq[:, :1, :])), axis=1
750
+ )
751
+ poses_seq[:, 3, 3] = 1
752
+
753
+ ids[seq] = ids_seq
754
+ poses[seq] = poses_seq
755
+ return ids, poses
756
+
757
+ @staticmethod
758
+ def _load_3d_bboxes(bbox_path, sequences):
759
+ bboxes = {}
760
+
761
+ for seq in sequences:
762
+ with open(Path(bbox_path) / f"{seq}.xml", "rb") as f:
763
+ tree = ET.parse(f)
764
+ root = tree.getroot()
765
+
766
+ objects = defaultdict(list)
767
+
768
+ num_bbox = 0
769
+
770
+ for child in root:
771
+ if child.find("transform") is None:
772
+ continue
773
+ obj = KITTI360Bbox3D()
774
+ if child.find("semanticId") is not None:
775
+ obj.parseBbox(child)
776
+ else:
777
+ obj.parseStuff(child)
778
+ # globalId = local2global(obj.semanticId, obj.instanceId)
779
+ # objects[globalId][obj.timestamp] = obj
780
+ objects[obj.timestamp].append(obj)
781
+ num_bbox += 1
782
+
783
+ # globalIds = np.asarray(list(objects.keys()))
784
+ # semanticIds, instanceIds = global2local(globalIds)
785
+ # for label in labels:
786
+ # if label.hasInstances:
787
+ # print(f'{label.name:<30}:\t {(semanticIds==label.id).sum()}')
788
+ # print(f'Loaded {len(globalIds)} instances')
789
+ # print(f'Loaded {num_bbox} boxes')
790
+
791
+ bboxes[seq] = objects
792
+
793
+ return bboxes
794
+
795
+ def get_img_id_from_id(self, sequence, id):
796
+ return self._img_ids[sequence][id]
797
+
798
+ def load_images(self, seq, img_ids, load_left, load_right, img_ids_fish=None):
799
+ imgs_p_left = []
800
+ imgs_f_left = []
801
+ imgs_p_right = []
802
+ imgs_f_right = []
803
+
804
+ if img_ids_fish is None:
805
+ img_ids_fish = img_ids
806
+
807
+ for id in img_ids:
808
+ if load_left:
809
+ img_perspective = (
810
+ cv2.cvtColor(
811
+ cv2.imread(
812
+ os.path.join(
813
+ self.data_path,
814
+ "data_2d_raw",
815
+ seq,
816
+ "image_00",
817
+ self._perspective_folder,
818
+ f"{id:010d}.png",
819
+ )
820
+ ),
821
+ cv2.COLOR_BGR2RGB,
822
+ ).astype(np.float32)
823
+ / 255
824
+ )
825
+ imgs_p_left += [img_perspective]
826
+
827
+ if load_right:
828
+ img_perspective = (
829
+ cv2.cvtColor(
830
+ cv2.imread(
831
+ os.path.join(
832
+ self.data_path,
833
+ "data_2d_raw",
834
+ seq,
835
+ "image_01",
836
+ self._perspective_folder,
837
+ f"{id:010d}.png",
838
+ )
839
+ ),
840
+ cv2.COLOR_BGR2RGB,
841
+ ).astype(np.float32)
842
+ / 255
843
+ )
844
+ imgs_p_right += [img_perspective]
845
+
846
+ for id in img_ids_fish:
847
+ if load_left:
848
+ img_fisheye = (
849
+ cv2.cvtColor(
850
+ cv2.imread(
851
+ os.path.join(
852
+ self.data_path,
853
+ "data_2d_raw",
854
+ seq,
855
+ "image_02",
856
+ self._fisheye_folder,
857
+ f"{id:010d}.png",
858
+ )
859
+ ),
860
+ cv2.COLOR_BGR2RGB,
861
+ ).astype(np.float32)
862
+ / 255
863
+ )
864
+ imgs_f_left += [img_fisheye]
865
+ if load_right:
866
+ img_fisheye = (
867
+ cv2.cvtColor(
868
+ cv2.imread(
869
+ os.path.join(
870
+ self.data_path,
871
+ "data_2d_raw",
872
+ seq,
873
+ "image_03",
874
+ self._fisheye_folder,
875
+ f"{id:010d}.png",
876
+ )
877
+ ),
878
+ cv2.COLOR_BGR2RGB,
879
+ ).astype(np.float32)
880
+ / 255
881
+ )
882
+ imgs_f_right += [img_fisheye]
883
+
884
+ return imgs_p_left, imgs_f_left, imgs_p_right, imgs_f_right
885
+
886
+ def process_img(
887
+ self,
888
+ img: np.array,
889
+ color_aug_fn=None,
890
+ resampler: FisheyeToPinholeSampler = None,
891
+ ):
892
+ if resampler is not None and not self.is_preprocessed:
893
+ img = torch.tensor(img).permute(2, 0, 1)
894
+ img = resampler.resample(img)
895
+ else:
896
+ if self.target_image_size:
897
+ img = cv2.resize(
898
+ img,
899
+ (self.target_image_size[1], self.target_image_size[0]),
900
+ interpolation=cv2.INTER_LINEAR,
901
+ )
902
+ img = np.transpose(img, (2, 0, 1))
903
+ img = torch.tensor(img)
904
+
905
+ if color_aug_fn is not None:
906
+ img = color_aug_fn(img)
907
+
908
+ img = img * 2 - 1
909
+ return img
910
+
911
+ def load_occ(self, seq, poses):
912
+ world_transform = torch.inverse(poses[:1, :, :])
913
+ world_transform = cam_incl_adjust @ world_transform
914
+ seq_len = self._img_ids[seq].shape[0]
915
+ # Load lidar pointclouds
916
+ points_all, velo_poses = [], []
917
+ for id in range(id, min(id + self.aggregate_timesteps, seq_len)):
918
+ points = np.fromfile(
919
+ os.path.join(
920
+ self.data_path,
921
+ "data_3d_raw",
922
+ seq,
923
+ "velodyne_points",
924
+ "data",
925
+ f"{self._img_ids[seq][id]:010d}.bin",
926
+ ),
927
+ dtype=np.float32,
928
+ ).reshape(-1, 4)
929
+ points[:, 3] = 1.0
930
+ points = torch.tensor(points)
931
+ velo_pose = (
932
+ world_transform.squeeze()
933
+ @ torch.tensor(self._poses[seq][id])
934
+ @ torch.tensor(self._calibs["T_velo_to_pose"])
935
+ )
936
+ points_all.append(points)
937
+ velo_poses.append(velo_pose)
938
+
939
+ velo_poses = torch.stack(velo_poses, dim=0)
940
+
941
+ return self.kitti_velodyn.check_occupancy(points_all, velo_poses)
942
+
943
+ def get_3d_bboxes(self, seq, img_id, pose, projs):
944
+ seq_3d_bboxes = self._3d_bboxes[seq]
945
+ pose_w2c = np.linalg.inv(pose)
946
+
947
+ def filter_bbox(bbox):
948
+ verts = bbox.vertices
949
+ verts = (projs @ (pose_w2c[:3, :3] @ verts.T + pose_w2c[:3, 3, None])).T
950
+ verts[:, :2] /= verts[:, 2:3]
951
+ valid = (
952
+ ((verts[:, 0] >= -1) & (verts[:, 0] <= 1))
953
+ & ((verts[:, 1] >= -1) & (verts[:, 1] <= 1))
954
+ & ((verts[:, 2] > 0) & (verts[:, 2] <= 80))
955
+ )
956
+ valid = np.any(valid, axis=-1)
957
+ return valid
958
+
959
+ bboxes = seq_3d_bboxes[-1] + seq_3d_bboxes[img_id]
960
+
961
+ bboxes = list(filter(filter_bbox, bboxes))
962
+
963
+ bboxes = [
964
+ {
965
+ "vertices": bbox.vertices,
966
+ "faces": bbox.faces,
967
+ "semanticId": bbox.semanticId,
968
+ "instanceId": bbox.instanceId,
969
+ }
970
+ for i, bbox in enumerate(bboxes)
971
+ ] # if valid[i]
972
+
973
+ return bboxes
974
+
975
+ def load_segmentation(self, seq, img_id):
976
+ seg = cv2.imread(
977
+ os.path.join(
978
+ self.data_path,
979
+ "data_2d_semantics",
980
+ "train",
981
+ seq,
982
+ "image_00",
983
+ "semantic",
984
+ f"{img_id:010d}.png",
985
+ ),
986
+ cv2.IMREAD_UNCHANGED,
987
+ )
988
+ seg = cv2.resize(
989
+ seg,
990
+ (self.target_image_size[1], self.target_image_size[0]),
991
+ interpolation=cv2.INTER_NEAREST,
992
+ )
993
+ return seg
994
+
995
+ def load_depth(self, seq, img_id, is_right):
996
+ points = np.fromfile(
997
+ os.path.join(
998
+ self.data_path,
999
+ "data_3d_raw",
1000
+ seq,
1001
+ "velodyne_points",
1002
+ "data",
1003
+ f"{img_id:010d}.bin",
1004
+ ),
1005
+ dtype=np.float32,
1006
+ ).reshape(-1, 4)
1007
+ points[:, 3] = 1.0
1008
+
1009
+ T_velo_to_cam = self._calibs["T_velo_to_cam"]["00" if not is_right else "01"]
1010
+ K = self._calibs["K_perspective"]
1011
+
1012
+ # project the points to the camera
1013
+ velo_pts_im = np.dot(K @ T_velo_to_cam[:3, :], points.T).T
1014
+ velo_pts_im[:, :2] = velo_pts_im[:, :2] / velo_pts_im[:, 2][..., None]
1015
+
1016
+ # the projection is normalized to [-1, 1] -> transform to [0, height-1] x [0, width-1]
1017
+ velo_pts_im[:, 0] = np.round(
1018
+ (velo_pts_im[:, 0] * 0.5 + 0.5) * self.target_image_size[1]
1019
+ )
1020
+ velo_pts_im[:, 1] = np.round(
1021
+ (velo_pts_im[:, 1] * 0.5 + 0.5) * self.target_image_size[0]
1022
+ )
1023
+
1024
+ # check if in bounds
1025
+ val_inds = (velo_pts_im[:, 0] >= 0) & (velo_pts_im[:, 1] >= 0)
1026
+ val_inds = (
1027
+ val_inds
1028
+ & (velo_pts_im[:, 0] < self.target_image_size[1])
1029
+ & (velo_pts_im[:, 1] < self.target_image_size[0])
1030
+ )
1031
+ velo_pts_im = velo_pts_im[val_inds, :]
1032
+
1033
+ # project to image
1034
+ depth = np.zeros(self.target_image_size)
1035
+ depth[
1036
+ velo_pts_im[:, 1].astype(np.int32), velo_pts_im[:, 0].astype(np.int32)
1037
+ ] = velo_pts_im[:, 2]
1038
+
1039
+ # find the duplicate points and choose the closest depth
1040
+ inds = (
1041
+ velo_pts_im[:, 1] * (self.target_image_size[1] - 1) + velo_pts_im[:, 0] - 1
1042
+ )
1043
+ dupe_inds = [item for item, count in Counter(inds).items() if count > 1]
1044
+ for dd in dupe_inds:
1045
+ pts = np.where(inds == dd)[0]
1046
+ x_loc = int(velo_pts_im[pts[0], 0])
1047
+ y_loc = int(velo_pts_im[pts[0], 1])
1048
+ depth[y_loc, x_loc] = velo_pts_im[pts, 2].min()
1049
+ depth[depth < 0] = 0
1050
+
1051
+ return depth[None, :, :]
1052
+
1053
+ def __getitem__(self, index: int):
1054
+ _start_time = time.time()
1055
+
1056
+ if index >= self.length:
1057
+ raise IndexError()
1058
+
1059
+ if self._skip != 0:
1060
+ index += self._skip
1061
+
1062
+ sequence, id, is_right = self._datapoints[index]
1063
+ seq_len = self._img_ids[sequence].shape[0]
1064
+
1065
+ load_left, load_right = (
1066
+ not is_right
1067
+ ) or self.return_stereo, is_right or self.return_stereo
1068
+
1069
+ ## randomly sample fisheye in the time steps where it can see the occlusion with the stereo
1070
+ if self.random_fisheye_offset:
1071
+ fisheye_offset = self.fisheye_offset[
1072
+ torch.randint(0, len(self.fisheye_offset), (1,)).item()
1073
+ ] ## randomly select among the given list of fisheye_ids from config
1074
+ else:
1075
+ fisheye_offset = self.fisheye_offset[-1]
1076
+
1077
+ if self.random_stereo_offset:
1078
+ stereo_offset = self.stereo_offset[
1079
+ torch.randint(0, len(self.stereo_offset), (1,)).item()
1080
+ ]
1081
+ else:
1082
+ stereo_offset = self.stereo_offset[0]
1083
+
1084
+ # ids = [id] + [max(min(i, seq_len-1), 0) for i in range(id - self._left_offset, id - self._left_offset + self.frame_count * self.dilation, self.dilation) if i != id]
1085
+ # ids_fish = [max(min(id + self.fisheye_offset, seq_len-1), 0)] + [max(min(i, seq_len-1), 0) for i in range(id + self.fisheye_offset - self._left_offset, id + self.fisheye_offset - self._left_offset + self.frame_count * self.dilation, self.dilation) if i != id + self.fisheye_offset]
1086
+ # img_ids = [self.get_img_id_from_id(sequence, id) for id in ids]
1087
+ # img_ids_fish = [self.get_img_id_from_id(sequence, id) for id in ids_fish]
1088
+
1089
+ id_st = (
1090
+ id + stereo_offset - 1
1091
+ ) ## TODO: find out how to deal with 3 steps ahead without -1 => as we sample scenes with the amount of stereo_offset
1092
+ ids = [id] + [
1093
+ max(min(i, seq_len - 1), 0)
1094
+ for i in range(
1095
+ id_st - self._left_offset,
1096
+ id_st - self._left_offset + self.frame_count * self.dilation,
1097
+ self.dilation,
1098
+ )
1099
+ if i != id_st
1100
+ ]
1101
+ ids_fish = [max(min(id + fisheye_offset, seq_len - 1), 0)] + [
1102
+ max(min(i, seq_len - 1), 0)
1103
+ for i in range(
1104
+ id + fisheye_offset - self._left_offset,
1105
+ id
1106
+ + fisheye_offset
1107
+ - self._left_offset
1108
+ + self.frame_count * self.dilation,
1109
+ self.dilation,
1110
+ )
1111
+ if i != id + fisheye_offset
1112
+ ]
1113
+ ## and now ids_fish is 5 steps ahead of ids with 2 fisheye scenes
1114
+ img_ids = [self.get_img_id_from_id(sequence, id) for id in ids]
1115
+ img_ids_fish = [self.get_img_id_from_id(sequence, id) for id in ids_fish]
1116
+
1117
+ if not self.return_fisheye:
1118
+ ids_fish, img_ids_fish = [], []
1119
+
1120
+ if self.color_aug:
1121
+ color_aug_fn = get_color_aug_fn(
1122
+ ColorJitter.get_params(
1123
+ brightness=(0.8, 1.2),
1124
+ contrast=(0.8, 1.2),
1125
+ saturation=(0.8, 1.2),
1126
+ hue=(-0.1, 0.1),
1127
+ )
1128
+ )
1129
+ else:
1130
+ color_aug_fn = None
1131
+
1132
+ _start_time_loading = time.time()
1133
+ imgs_p_left, imgs_f_left, imgs_p_right, imgs_f_right = self.load_images(
1134
+ sequence, img_ids, load_left, load_right, img_ids_fish=img_ids_fish
1135
+ )
1136
+ _loading_time = np.array(time.time() - _start_time_loading)
1137
+
1138
+ _start_time_processing = time.time()
1139
+ imgs_p_left = [
1140
+ self.process_img(img, color_aug_fn=color_aug_fn) for img in imgs_p_left
1141
+ ]
1142
+ imgs_f_left = [
1143
+ self.process_img(
1144
+ img, color_aug_fn=color_aug_fn, resampler=self._resampler_02
1145
+ )
1146
+ for img in imgs_f_left
1147
+ ]
1148
+ imgs_p_right = [
1149
+ self.process_img(img, color_aug_fn=color_aug_fn) for img in imgs_p_right
1150
+ ]
1151
+ imgs_f_right = [
1152
+ self.process_img(
1153
+ img, color_aug_fn=color_aug_fn, resampler=self._resampler_03
1154
+ )
1155
+ for img in imgs_f_right
1156
+ ]
1157
+ _processing_time = np.array(time.time() - _start_time_processing)
1158
+
1159
+ # These poses are camera to world !!
1160
+ poses_p_left = (
1161
+ [
1162
+ self._poses[sequence][i, :, :] @ self._calibs["T_cam_to_pose"]["00"]
1163
+ for i in ids
1164
+ ]
1165
+ if load_left
1166
+ else []
1167
+ )
1168
+ poses_f_left = (
1169
+ [
1170
+ self._poses[sequence][i, :, :] @ self._calibs["T_cam_to_pose"]["02"]
1171
+ for i in ids_fish
1172
+ ]
1173
+ if load_left
1174
+ else []
1175
+ )
1176
+ poses_p_right = (
1177
+ [
1178
+ self._poses[sequence][i, :, :] @ self._calibs["T_cam_to_pose"]["01"]
1179
+ for i in ids
1180
+ ]
1181
+ if load_right
1182
+ else []
1183
+ )
1184
+ poses_f_right = (
1185
+ [
1186
+ self._poses[sequence][i, :, :] @ self._calibs["T_cam_to_pose"]["03"]
1187
+ for i in ids_fish
1188
+ ]
1189
+ if load_right
1190
+ else []
1191
+ )
1192
+
1193
+ projs_p_left = [self._calibs["K_perspective"] for _ in ids] if load_left else []
1194
+ projs_f_left = (
1195
+ [self._calibs["K_fisheye"] for _ in ids_fish] if load_left else []
1196
+ )
1197
+ projs_p_right = (
1198
+ [self._calibs["K_perspective"] for _ in ids] if load_right else []
1199
+ )
1200
+ projs_f_right = (
1201
+ [self._calibs["K_fisheye"] for _ in ids_fish] if load_right else []
1202
+ )
1203
+
1204
+ imgs = (
1205
+ imgs_p_left + imgs_p_right + imgs_f_left + imgs_f_right
1206
+ if not is_right
1207
+ else imgs_p_right + imgs_p_left + imgs_f_right + imgs_f_left
1208
+ )
1209
+ projs = (
1210
+ projs_p_left + projs_p_right + projs_f_left + projs_f_right
1211
+ if not is_right
1212
+ else projs_p_right + projs_p_left + projs_f_right + projs_f_left
1213
+ )
1214
+ poses = (
1215
+ poses_p_left + poses_p_right + poses_f_left + poses_f_right
1216
+ if not is_right
1217
+ else poses_p_right + poses_p_left + poses_f_right + poses_f_left
1218
+ )
1219
+ ids = np.array(ids + ids + ids_fish + ids_fish, dtype=np.int32)
1220
+
1221
+ if self.return_depth:
1222
+ depths = [self.load_depth(sequence, img_ids[0], is_right)]
1223
+ else:
1224
+ depths = []
1225
+
1226
+ if self.return_3d_bboxes:
1227
+ bboxes_3d = [self.get_3d_bboxes(sequence, img_ids[0], poses[0], projs[0])]
1228
+ else:
1229
+ bboxes_3d = []
1230
+
1231
+ if self.return_segmentation:
1232
+ segs = [self.load_segmentation(sequence, img_ids[0])]
1233
+ else:
1234
+ segs = []
1235
+
1236
+ if self.kitti_velodyn:
1237
+ is_occupied, is_visible = self.load_occ(sequence, poses)
1238
+ else:
1239
+ is_occupied, is_visible = [], []
1240
+
1241
+ _proc_time = np.array(time.time() - _start_time)
1242
+
1243
+ # print(_loading_time, _processing_time, _proc_time)
1244
+
1245
+ data = {
1246
+ "imgs": imgs,
1247
+ "projs": projs,
1248
+ "poses": poses,
1249
+ "depths": depths,
1250
+ "ts": ids,
1251
+ "3d_bboxes": bboxes_3d,
1252
+ "segs": segs,
1253
+ "is_occupied": is_occupied,
1254
+ "is_visible": is_visible,
1255
+ "t__get_item__": np.array([_proc_time]),
1256
+ "index": np.array([index]),
1257
+ }
1258
+
1259
+ return data
1260
+
1261
+ def __len__(self) -> int:
1262
+ # return 10
1263
+ return self.length
datasets/kitti_360/labels.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ #
3
+ # KITTI-360 labels
4
+ #
5
+
6
+ from collections import namedtuple
7
+
8
+
9
+ #--------------------------------------------------------------------------------
10
+ # Definitions
11
+ #--------------------------------------------------------------------------------
12
+
13
+ # a label and all meta information
14
+ Label = namedtuple( 'Label' , [
15
+
16
+ 'name' , # The identifier of this label, e.g. 'car', 'person', ... .
17
+ # We use them to uniquely name a class
18
+
19
+ 'id' , # An integer ID that is associated with this label.
20
+ # The IDs are used to represent the label in ground truth images
21
+ # An ID of -1 means that this label does not have an ID and thus
22
+ # is ignored when creating ground truth images (e.g. license plate).
23
+ # Do not modify these IDs, since exactly these IDs are expected by the
24
+ # evaluation server.
25
+
26
+ 'kittiId' , # An integer ID that is associated with this label for KITTI-360
27
+ # NOT FOR RELEASING
28
+
29
+ 'trainId' , # Feel free to modify these IDs as suitable for your method. Then create
30
+ # ground truth images with train IDs, using the tools provided in the
31
+ # 'preparation' folder. However, make sure to validate or submit results
32
+ # to our evaluation server using the regular IDs above!
33
+ # For trainIds, multiple labels might have the same ID. Then, these labels
34
+ # are mapped to the same class in the ground truth images. For the inverse
35
+ # mapping, we use the label that is defined first in the list below.
36
+ # For example, mapping all void-type classes to the same ID in training,
37
+ # might make sense for some approaches.
38
+ # Max value is 255!
39
+
40
+ 'category' , # The name of the category that this label belongs to
41
+
42
+ 'categoryId' , # The ID of this category. Used to create ground truth images
43
+ # on category level.
44
+
45
+ 'hasInstances', # Whether this label distinguishes between single instances or not
46
+
47
+ 'ignoreInEval', # Whether pixels having this class as ground truth label are ignored
48
+ # during evaluations or not
49
+
50
+ 'ignoreInInst', # Whether pixels having this class as ground truth label are ignored
51
+ # during evaluations of instance segmentation or not
52
+
53
+ 'color' , # The color of this label
54
+ ] )
55
+
56
+
57
+ #--------------------------------------------------------------------------------
58
+ # A list of all labels
59
+ #--------------------------------------------------------------------------------
60
+
61
+ # Please adapt the train IDs as appropriate for your approach.
62
+ # Note that you might want to ignore labels with ID 255 during training.
63
+ # Further note that the current train IDs are only a suggestion. You can use whatever you like.
64
+ # Make sure to provide your results using the original IDs and not the training IDs.
65
+ # Note that many IDs are ignored in evaluation and thus you never need to predict these!
66
+
67
+ labels = [
68
+ # name id kittiId, trainId category catId hasInstances ignoreInEval ignoreInInst color
69
+ Label( 'unlabeled' , 0 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ),
70
+ Label( 'ego vehicle' , 1 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ),
71
+ Label( 'rectification border' , 2 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ),
72
+ Label( 'out of roi' , 3 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ),
73
+ Label( 'static' , 4 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ),
74
+ Label( 'dynamic' , 5 , -1 , 255 , 'void' , 0 , False , True , True , (111, 74, 0) ),
75
+ Label( 'ground' , 6 , -1 , 255 , 'void' , 0 , False , True , True , ( 81, 0, 81) ),
76
+ Label( 'road' , 7 , 1 , 0 , 'flat' , 1 , False , False , False , (128, 64,128) ),
77
+ Label( 'sidewalk' , 8 , 3 , 1 , 'flat' , 1 , False , False , False , (244, 35,232) ),
78
+ Label( 'parking' , 9 , 2 , 255 , 'flat' , 1 , False , True , True , (250,170,160) ),
79
+ Label( 'rail track' , 10 , 10, 255 , 'flat' , 1 , False , True , True , (230,150,140) ),
80
+ Label( 'building' , 11 , 11, 2 , 'construction' , 2 , True , False , False , ( 70, 70, 70) ),
81
+ Label( 'wall' , 12 , 7 , 3 , 'construction' , 2 , False , False , False , (102,102,156) ),
82
+ Label( 'fence' , 13 , 8 , 4 , 'construction' , 2 , False , False , False , (190,153,153) ),
83
+ Label( 'guard rail' , 14 , 30, 255 , 'construction' , 2 , False , True , True , (180,165,180) ),
84
+ Label( 'bridge' , 15 , 31, 255 , 'construction' , 2 , False , True , True , (150,100,100) ),
85
+ Label( 'tunnel' , 16 , 32, 255 , 'construction' , 2 , False , True , True , (150,120, 90) ),
86
+ Label( 'pole' , 17 , 21, 5 , 'object' , 3 , True , False , True , (153,153,153) ),
87
+ Label( 'polegroup' , 18 , -1 , 255 , 'object' , 3 , False , True , True , (153,153,153) ),
88
+ Label( 'traffic light' , 19 , 23, 6 , 'object' , 3 , True , False , True , (250,170, 30) ),
89
+ Label( 'traffic sign' , 20 , 24, 7 , 'object' , 3 , True , False , True , (220,220, 0) ),
90
+ Label( 'vegetation' , 21 , 5 , 8 , 'nature' , 4 , False , False , False , (107,142, 35) ),
91
+ Label( 'terrain' , 22 , 4 , 9 , 'nature' , 4 , False , False , False , (152,251,152) ),
92
+ Label( 'sky' , 23 , 9 , 10 , 'sky' , 5 , False , False , False , ( 70,130,180) ),
93
+ Label( 'person' , 24 , 19, 11 , 'human' , 6 , True , False , False , (220, 20, 60) ),
94
+ Label( 'rider' , 25 , 20, 12 , 'human' , 6 , True , False , False , (255, 0, 0) ),
95
+ Label( 'car' , 26 , 13, 13 , 'vehicle' , 7 , True , False , False , ( 0, 0,142) ),
96
+ Label( 'truck' , 27 , 14, 14 , 'vehicle' , 7 , True , False , False , ( 0, 0, 70) ),
97
+ Label( 'bus' , 28 , 34, 15 , 'vehicle' , 7 , True , False , False , ( 0, 60,100) ),
98
+ Label( 'caravan' , 29 , 16, 255 , 'vehicle' , 7 , True , True , True , ( 0, 0, 90) ),
99
+ Label( 'trailer' , 30 , 15, 255 , 'vehicle' , 7 , True , True , True , ( 0, 0,110) ),
100
+ Label( 'train' , 31 , 33, 16 , 'vehicle' , 7 , True , False , False , ( 0, 80,100) ),
101
+ Label( 'motorcycle' , 32 , 17, 17 , 'vehicle' , 7 , True , False , False , ( 0, 0,230) ),
102
+ Label( 'bicycle' , 33 , 18, 18 , 'vehicle' , 7 , True , False , False , (119, 11, 32) ),
103
+ Label( 'garage' , 34 , 12, 2 , 'construction' , 2 , True , True , True , ( 64,128,128) ),
104
+ Label( 'gate' , 35 , 6 , 4 , 'construction' , 2 , False , True , True , (190,153,153) ),
105
+ Label( 'stop' , 36 , 29, 255 , 'construction' , 2 , True , True , True , (150,120, 90) ),
106
+ Label( 'smallpole' , 37 , 22, 5 , 'object' , 3 , True , True , True , (153,153,153) ),
107
+ Label( 'lamp' , 38 , 25, 255 , 'object' , 3 , True , True , True , (0, 64, 64) ),
108
+ Label( 'trash bin' , 39 , 26, 255 , 'object' , 3 , True , True , True , (0, 128,192) ),
109
+ Label( 'vending machine' , 40 , 27, 255 , 'object' , 3 , True , True , True , (128, 64, 0) ),
110
+ Label( 'box' , 41 , 28, 255 , 'object' , 3 , True , True , True , (64, 64,128) ),
111
+ Label( 'unknown construction' , 42 , 35, 255 , 'void' , 0 , False , True , True , (102, 0, 0) ),
112
+ Label( 'unknown vehicle' , 43 , 36, 255 , 'void' , 0 , False , True , True , ( 51, 0, 51) ),
113
+ Label( 'unknown object' , 44 , 37, 255 , 'void' , 0 , False , True , True , ( 32, 32, 32) ),
114
+ Label( 'license plate' , -1 , -1, -1 , 'vehicle' , 7 , False , True , True , ( 0, 0,142) ),
115
+ ]
116
+
117
+ #--------------------------------------------------------------------------------
118
+ # Create dictionaries for a fast lookup
119
+ #--------------------------------------------------------------------------------
120
+
121
+ # Please refer to the main method below for example usages!
122
+
123
+ # name to label object
124
+ name2label = { label.name : label for label in labels }
125
+ # id to label object
126
+ id2label = { label.id : label for label in labels }
127
+ # trainId to label object
128
+ trainId2label = { label.trainId : label for label in reversed(labels) }
129
+ # KITTI-360 ID to cityscapes ID
130
+ kittiId2label = { label.kittiId : label for label in labels }
131
+ # category to list of label objects
132
+ category2labels = {}
133
+ for label in labels:
134
+ category = label.category
135
+ if category in category2labels:
136
+ category2labels[category].append(label)
137
+ else:
138
+ category2labels[category] = [label]
139
+
140
+ #--------------------------------------------------------------------------------
141
+ # Assure single instance name
142
+ #--------------------------------------------------------------------------------
143
+
144
+ # returns the label name that describes a single instance (if possible)
145
+ # e.g. input | output
146
+ # ----------------------
147
+ # car | car
148
+ # cargroup | car
149
+ # foo | None
150
+ # foogroup | None
151
+ # skygroup | None
152
+ def assureSingleInstanceName( name ):
153
+ # if the name is known, it is not a group
154
+ if name in name2label:
155
+ return name
156
+ # test if the name actually denotes a group
157
+ if not name.endswith("group"):
158
+ return None
159
+ # remove group
160
+ name = name[:-len("group")]
161
+ # test if the new name exists
162
+ if not name in name2label:
163
+ return None
164
+ # test if the new name denotes a label that actually has instances
165
+ if not name2label[name].hasInstances:
166
+ return None
167
+ # all good then
168
+ return name
169
+
170
+ #--------------------------------------------------------------------------------
171
+ # Main for testing
172
+ #--------------------------------------------------------------------------------
173
+
174
+ # just a dummy main
175
+ if __name__ == "__main__":
176
+ # Print all the labels
177
+ print("List of KITTI-360 labels:")
178
+ print("")
179
+ print(" {:>21} | {:>3} | {:>7} | {:>14} | {:>10} | {:>12} | {:>12}".format( 'name', 'id', 'trainId', 'category', 'categoryId', 'hasInstances', 'ignoreInEval' ))
180
+ print(" " + ('-' * 98))
181
+ for label in labels:
182
+ # print(" {:>21} | {:>3} | {:>7} | {:>14} | {:>10} | {:>12} | {:>12}".format( label.name, label.id, label.trainId, label.category, label.categoryId, label.hasInstances, label.ignoreInEval ))
183
+ print(" \"{:}\"".format(label.name))
184
+ print("")
185
+
186
+ print("Example usages:")
187
+
188
+ # Map from name to label
189
+ name = 'car'
190
+ id = name2label[name].id
191
+ print("ID of label '{name}': {id}".format( name=name, id=id ))
192
+
193
+ # Map from ID to label
194
+ category = id2label[id].category
195
+ print("Category of label with ID '{id}': {category}".format( id=id, category=category ))
196
+
197
+ # Map from trainID to label
198
+ trainId = 0
199
+ name = trainId2label[trainId].name
200
+ print("Name of label with trainID '{id}': {name}".format( id=trainId, name=name ))
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0000_sync/poses.txt ADDED
The diff for this file is too large to render. See raw diff
 
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0002_sync/poses.txt ADDED
The diff for this file is too large to render. See raw diff
 
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0003_sync/poses.txt ADDED
The diff for this file is too large to render. See raw diff
 
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0004_sync/poses.txt ADDED
The diff for this file is too large to render. See raw diff
 
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0005_sync/poses.txt ADDED
The diff for this file is too large to render. See raw diff
 
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0006_sync/poses.txt ADDED
The diff for this file is too large to render. See raw diff
 
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0007_sync/poses.txt ADDED
The diff for this file is too large to render. See raw diff
 
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0009_sync/poses.txt ADDED
The diff for this file is too large to render. See raw diff
 
datasets/kitti_360/orb_slam_poses/2013_05_28_drive_0010_sync/poses.txt ADDED
The diff for this file is too large to render. See raw diff
 
datasets/kitti_360/preprocess_kitti_360.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import sys
3
+ sys.path.append(".")
4
+
5
+ from pathlib import Path
6
+ import subprocess
7
+
8
+ import cv2
9
+ import numpy as np
10
+ from tqdm import tqdm
11
+
12
+ from datasets.kitti_360.kitti_360_dataset import Kitti360Dataset
13
+
14
+ DRY_RUN = False
15
+
16
+
17
+ def main():
18
+ parser = argparse.ArgumentParser("KITTI 360 Preprocessing")
19
+ parser.add_argument("--data_path", "-d", type=str)
20
+ parser.add_argument("--resolution", "-r", default=(192, 640))
21
+ parser.add_argument("--fisheye_rotation", "-f", default=(0, -15))
22
+ parser.add_argument("--only_fisheye", "-o", action="store_true")
23
+
24
+ args = parser.parse_args()
25
+
26
+ data_path = Path(args.data_path)
27
+ resolution = args.resolution
28
+ rotation = args.fisheye_rotation
29
+ only_fisheye = args.only_fisheye
30
+
31
+ print("Setting up dataset")
32
+ dataset = Kitti360Dataset(
33
+ data_path=data_path,
34
+ pose_path=data_path / "data_poses",
35
+ split_path=None,
36
+ return_stereo=True,
37
+ frame_count=1,
38
+ fisheye_rotation=rotation,
39
+ color_aug=False,
40
+ return_segmentation=False,
41
+ )
42
+
43
+ print("Setting up folders...")
44
+
45
+ for i in tqdm(range(len(dataset))):
46
+ sequence, id, is_right = dataset._datapoints[i]
47
+
48
+ if is_right:
49
+ continue
50
+
51
+ image_00 = data_path / "data_2d_raw" / sequence / "image_00" / f"data_{resolution[0]}x{resolution[1]}"
52
+ image_01 = data_path / "data_2d_raw" / sequence / "image_01" / f"data_{resolution[0]}x{resolution[1]}"
53
+ image_02 = data_path / "data_2d_raw" / sequence / "image_02" / f"data_{resolution[0]}x{resolution[1]}_{rotation[0]}x{rotation[1]}"
54
+ image_03 = data_path / "data_2d_raw" / sequence / "image_03" / f"data_{resolution[0]}x{resolution[1]}_{rotation[0]}x{rotation[1]}"
55
+
56
+ img_id = dataset._img_ids[sequence][id]
57
+
58
+ if (image_00 / f"{img_id:010d}.png").exists():
59
+ continue
60
+
61
+ data = dataset[i]
62
+
63
+ image_00.mkdir(exist_ok=True, parents=True)
64
+ image_01.mkdir(exist_ok=True, parents=True)
65
+ image_02.mkdir(exist_ok=True, parents=True)
66
+ image_03.mkdir(exist_ok=True, parents=True)
67
+
68
+ img_00 = (np.transpose(data["imgs"][0].numpy(), (1, 2, 0)) * .5 + .5) * 255.
69
+ img_01 = (np.transpose(data["imgs"][1].numpy(), (1, 2, 0)) * .5 + .5) * 255.
70
+ img_02 = (np.transpose(data["imgs"][2].numpy(), (1, 2, 0)) * .5 + .5) * 255.
71
+ img_03 = (np.transpose(data["imgs"][3].numpy(), (1, 2, 0)) * .5 + .5) * 255.
72
+
73
+ if not only_fisheye:
74
+ cv2.imwrite(str(image_00 / f"{img_id:010d}.png"), cv2.cvtColor(img_00, cv2.COLOR_RGB2BGR))
75
+ cv2.imwrite(str(image_01 / f"{img_id:010d}.png"), cv2.cvtColor(img_01, cv2.COLOR_RGB2BGR))
76
+ cv2.imwrite(str(image_02 / f"{img_id:010d}.png"), cv2.cvtColor(img_02, cv2.COLOR_RGB2BGR))
77
+ cv2.imwrite(str(image_03 / f"{img_id:010d}.png"), cv2.cvtColor(img_03, cv2.COLOR_RGB2BGR))
78
+
79
+
80
+ if __name__ == "__main__":
81
+ main()
datasets/kitti_360/splits/seg/test_files.txt ADDED
@@ -0,0 +1,446 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2013_05_28_drive_0000_sync 0000000386 l
2
+ 2013_05_28_drive_0000_sync 0000000406 l
3
+ 2013_05_28_drive_0000_sync 0000000426 l
4
+ 2013_05_28_drive_0000_sync 0000000446 l
5
+ 2013_05_28_drive_0000_sync 0000000466 l
6
+ 2013_05_28_drive_0000_sync 0000000486 l
7
+ 2013_05_28_drive_0000_sync 0000000506 l
8
+ 2013_05_28_drive_0000_sync 0000000546 l
9
+ 2013_05_28_drive_0000_sync 0000000606 l
10
+ 2013_05_28_drive_0000_sync 0000000626 l
11
+ 2013_05_28_drive_0000_sync 0000000646 l
12
+ 2013_05_28_drive_0000_sync 0000000666 l
13
+ 2013_05_28_drive_0000_sync 0000000706 l
14
+ 2013_05_28_drive_0000_sync 0000000726 l
15
+ 2013_05_28_drive_0000_sync 0000000746 l
16
+ 2013_05_28_drive_0000_sync 0000000786 l
17
+ 2013_05_28_drive_0000_sync 0000000806 l
18
+ 2013_05_28_drive_0000_sync 0000000826 l
19
+ 2013_05_28_drive_0000_sync 0000000846 l
20
+ 2013_05_28_drive_0000_sync 0000000866 l
21
+ 2013_05_28_drive_0000_sync 0000000886 l
22
+ 2013_05_28_drive_0000_sync 0000000906 l
23
+ 2013_05_28_drive_0000_sync 0000000926 l
24
+ 2013_05_28_drive_0000_sync 0000000946 l
25
+ 2013_05_28_drive_0000_sync 0000000966 l
26
+ 2013_05_28_drive_0000_sync 0000000986 l
27
+ 2013_05_28_drive_0000_sync 0000001054 l
28
+ 2013_05_28_drive_0000_sync 0000001111 l
29
+ 2013_05_28_drive_0000_sync 0000001138 l
30
+ 2013_05_28_drive_0000_sync 0000001158 l
31
+ 2013_05_28_drive_0000_sync 0000001178 l
32
+ 2013_05_28_drive_0000_sync 0000001198 l
33
+ 2013_05_28_drive_0000_sync 0000001218 l
34
+ 2013_05_28_drive_0000_sync 0000001238 l
35
+ 2013_05_28_drive_0000_sync 0000001258 l
36
+ 2013_05_28_drive_0000_sync 0000001278 l
37
+ 2013_05_28_drive_0000_sync 0000001298 l
38
+ 2013_05_28_drive_0000_sync 0000001338 l
39
+ 2013_05_28_drive_0000_sync 0000001358 l
40
+ 2013_05_28_drive_0000_sync 0000001378 l
41
+ 2013_05_28_drive_0000_sync 0000001398 l
42
+ 2013_05_28_drive_0000_sync 0000001418 l
43
+ 2013_05_28_drive_0000_sync 0000001438 l
44
+ 2013_05_28_drive_0000_sync 0000001458 l
45
+ 2013_05_28_drive_0000_sync 0000001478 l
46
+ 2013_05_28_drive_0000_sync 0000001498 l
47
+ 2013_05_28_drive_0000_sync 0000001518 l
48
+ 2013_05_28_drive_0000_sync 0000001538 l
49
+ 2013_05_28_drive_0000_sync 0000001558 l
50
+ 2013_05_28_drive_0000_sync 0000001578 l
51
+ 2013_05_28_drive_0000_sync 0000001598 l
52
+ 2013_05_28_drive_0000_sync 0000001618 l
53
+ 2013_05_28_drive_0000_sync 0000001638 l
54
+ 2013_05_28_drive_0000_sync 0000001658 l
55
+ 2013_05_28_drive_0000_sync 0000001678 l
56
+ 2013_05_28_drive_0000_sync 0000001698 l
57
+ 2013_05_28_drive_0000_sync 0000001718 l
58
+ 2013_05_28_drive_0000_sync 0000001738 l
59
+ 2013_05_28_drive_0000_sync 0000001758 l
60
+ 2013_05_28_drive_0000_sync 0000001778 l
61
+ 2013_05_28_drive_0000_sync 0000001798 l
62
+ 2013_05_28_drive_0000_sync 0000001818 l
63
+ 2013_05_28_drive_0000_sync 0000001858 l
64
+ 2013_05_28_drive_0000_sync 0000001878 l
65
+ 2013_05_28_drive_0000_sync 0000001898 l
66
+ 2013_05_28_drive_0000_sync 0000001918 l
67
+ 2013_05_28_drive_0000_sync 0000001938 l
68
+ 2013_05_28_drive_0000_sync 0000001958 l
69
+ 2013_05_28_drive_0000_sync 0000001978 l
70
+ 2013_05_28_drive_0000_sync 0000002725 l
71
+ 2013_05_28_drive_0000_sync 0000002745 l
72
+ 2013_05_28_drive_0000_sync 0000002765 l
73
+ 2013_05_28_drive_0000_sync 0000002785 l
74
+ 2013_05_28_drive_0000_sync 0000002805 l
75
+ 2013_05_28_drive_0000_sync 0000002825 l
76
+ 2013_05_28_drive_0000_sync 0000002845 l
77
+ 2013_05_28_drive_0000_sync 0000002865 l
78
+ 2013_05_28_drive_0000_sync 0000002885 l
79
+ 2013_05_28_drive_0000_sync 0000002905 l
80
+ 2013_05_28_drive_0000_sync 0000003266 l
81
+ 2013_05_28_drive_0000_sync 0000003286 l
82
+ 2013_05_28_drive_0000_sync 0000003306 l
83
+ 2013_05_28_drive_0000_sync 0000003326 l
84
+ 2013_05_28_drive_0000_sync 0000003346 l
85
+ 2013_05_28_drive_0000_sync 0000003366 l
86
+ 2013_05_28_drive_0000_sync 0000003386 l
87
+ 2013_05_28_drive_0000_sync 0000003406 l
88
+ 2013_05_28_drive_0000_sync 0000003426 l
89
+ 2013_05_28_drive_0000_sync 0000003446 l
90
+ 2013_05_28_drive_0000_sync 0000003466 l
91
+ 2013_05_28_drive_0000_sync 0000003486 l
92
+ 2013_05_28_drive_0000_sync 0000003506 l
93
+ 2013_05_28_drive_0000_sync 0000003526 l
94
+ 2013_05_28_drive_0000_sync 0000003546 l
95
+ 2013_05_28_drive_0000_sync 0000003566 l
96
+ 2013_05_28_drive_0000_sync 0000003586 l
97
+ 2013_05_28_drive_0000_sync 0000003606 l
98
+ 2013_05_28_drive_0000_sync 0000003626 l
99
+ 2013_05_28_drive_0000_sync 0000003666 l
100
+ 2013_05_28_drive_0000_sync 0000003686 l
101
+ 2013_05_28_drive_0000_sync 0000003706 l
102
+ 2013_05_28_drive_0000_sync 0000003726 l
103
+ 2013_05_28_drive_0000_sync 0000003746 l
104
+ 2013_05_28_drive_0000_sync 0000003766 l
105
+ 2013_05_28_drive_0000_sync 0000003786 l
106
+ 2013_05_28_drive_0000_sync 0000003806 l
107
+ 2013_05_28_drive_0000_sync 0000003826 l
108
+ 2013_05_28_drive_0000_sync 0000003846 l
109
+ 2013_05_28_drive_0000_sync 0000003886 l
110
+ 2013_05_28_drive_0000_sync 0000003906 l
111
+ 2013_05_28_drive_0002_sync 0000004618 l
112
+ 2013_05_28_drive_0002_sync 0000004638 l
113
+ 2013_05_28_drive_0002_sync 0000004658 l
114
+ 2013_05_28_drive_0002_sync 0000004678 l
115
+ 2013_05_28_drive_0002_sync 0000004698 l
116
+ 2013_05_28_drive_0002_sync 0000004718 l
117
+ 2013_05_28_drive_0002_sync 0000004738 l
118
+ 2013_05_28_drive_0002_sync 0000004758 l
119
+ 2013_05_28_drive_0002_sync 0000004778 l
120
+ 2013_05_28_drive_0002_sync 0000004798 l
121
+ 2013_05_28_drive_0002_sync 0000004818 l
122
+ 2013_05_28_drive_0002_sync 0000004838 l
123
+ 2013_05_28_drive_0002_sync 0000004858 l
124
+ 2013_05_28_drive_0002_sync 0000004878 l
125
+ 2013_05_28_drive_0002_sync 0000004898 l
126
+ 2013_05_28_drive_0002_sync 0000004918 l
127
+ 2013_05_28_drive_0002_sync 0000004938 l
128
+ 2013_05_28_drive_0002_sync 0000004958 l
129
+ 2013_05_28_drive_0002_sync 0000004978 l
130
+ 2013_05_28_drive_0002_sync 0000005006 l
131
+ 2013_05_28_drive_0002_sync 0000005086 l
132
+ 2013_05_28_drive_0002_sync 0000005106 l
133
+ 2013_05_28_drive_0002_sync 0000005126 l
134
+ 2013_05_28_drive_0002_sync 0000005146 l
135
+ 2013_05_28_drive_0002_sync 0000005166 l
136
+ 2013_05_28_drive_0002_sync 0000005186 l
137
+ 2013_05_28_drive_0002_sync 0000005206 l
138
+ 2013_05_28_drive_0002_sync 0000005226 l
139
+ 2013_05_28_drive_0002_sync 0000005246 l
140
+ 2013_05_28_drive_0002_sync 0000005266 l
141
+ 2013_05_28_drive_0002_sync 0000005286 l
142
+ 2013_05_28_drive_0002_sync 0000005306 l
143
+ 2013_05_28_drive_0002_sync 0000005326 l
144
+ 2013_05_28_drive_0002_sync 0000005366 l
145
+ 2013_05_28_drive_0002_sync 0000005406 l
146
+ 2013_05_28_drive_0002_sync 0000005426 l
147
+ 2013_05_28_drive_0002_sync 0000005446 l
148
+ 2013_05_28_drive_0002_sync 0000005466 l
149
+ 2013_05_28_drive_0002_sync 0000005486 l
150
+ 2013_05_28_drive_0002_sync 0000005506 l
151
+ 2013_05_28_drive_0002_sync 0000005526 l
152
+ 2013_05_28_drive_0002_sync 0000005546 l
153
+ 2013_05_28_drive_0002_sync 0000005566 l
154
+ 2013_05_28_drive_0002_sync 0000005586 l
155
+ 2013_05_28_drive_0002_sync 0000005661 l
156
+ 2013_05_28_drive_0002_sync 0000005782 l
157
+ 2013_05_28_drive_0002_sync 0000005802 l
158
+ 2013_05_28_drive_0002_sync 0000005822 l
159
+ 2013_05_28_drive_0002_sync 0000006002 l
160
+ 2013_05_28_drive_0002_sync 0000006062 l
161
+ 2013_05_28_drive_0002_sync 0000006082 l
162
+ 2013_05_28_drive_0002_sync 0000006102 l
163
+ 2013_05_28_drive_0002_sync 0000006122 l
164
+ 2013_05_28_drive_0002_sync 0000006222 l
165
+ 2013_05_28_drive_0002_sync 0000006242 l
166
+ 2013_05_28_drive_0002_sync 0000006262 l
167
+ 2013_05_28_drive_0002_sync 0000006282 l
168
+ 2013_05_28_drive_0002_sync 0000006362 l
169
+ 2013_05_28_drive_0002_sync 0000006382 l
170
+ 2013_05_28_drive_0002_sync 0000015219 l
171
+ 2013_05_28_drive_0002_sync 0000015239 l
172
+ 2013_05_28_drive_0002_sync 0000015259 l
173
+ 2013_05_28_drive_0002_sync 0000015319 l
174
+ 2013_05_28_drive_0003_sync 0000000182 l
175
+ 2013_05_28_drive_0003_sync 0000000262 l
176
+ 2013_05_28_drive_0004_sync 0000002922 l
177
+ 2013_05_28_drive_0004_sync 0000003002 l
178
+ 2013_05_28_drive_0004_sync 0000003022 l
179
+ 2013_05_28_drive_0004_sync 0000003142 l
180
+ 2013_05_28_drive_0004_sync 0000003162 l
181
+ 2013_05_28_drive_0004_sync 0000003182 l
182
+ 2013_05_28_drive_0004_sync 0000003202 l
183
+ 2013_05_28_drive_0004_sync 0000003222 l
184
+ 2013_05_28_drive_0004_sync 0000003242 l
185
+ 2013_05_28_drive_0004_sync 0000003262 l
186
+ 2013_05_28_drive_0004_sync 0000003282 l
187
+ 2013_05_28_drive_0004_sync 0000003302 l
188
+ 2013_05_28_drive_0004_sync 0000003322 l
189
+ 2013_05_28_drive_0004_sync 0000003342 l
190
+ 2013_05_28_drive_0004_sync 0000003362 l
191
+ 2013_05_28_drive_0004_sync 0000003382 l
192
+ 2013_05_28_drive_0004_sync 0000003402 l
193
+ 2013_05_28_drive_0004_sync 0000003422 l
194
+ 2013_05_28_drive_0004_sync 0000003442 l
195
+ 2013_05_28_drive_0004_sync 0000003462 l
196
+ 2013_05_28_drive_0004_sync 0000003542 l
197
+ 2013_05_28_drive_0004_sync 0000003562 l
198
+ 2013_05_28_drive_0004_sync 0000003582 l
199
+ 2013_05_28_drive_0004_sync 0000003602 l
200
+ 2013_05_28_drive_0004_sync 0000003622 l
201
+ 2013_05_28_drive_0004_sync 0000003642 l
202
+ 2013_05_28_drive_0004_sync 0000003662 l
203
+ 2013_05_28_drive_0004_sync 0000003682 l
204
+ 2013_05_28_drive_0004_sync 0000003797 l
205
+ 2013_05_28_drive_0004_sync 0000003825 l
206
+ 2013_05_28_drive_0004_sync 0000003845 l
207
+ 2013_05_28_drive_0004_sync 0000003865 l
208
+ 2013_05_28_drive_0004_sync 0000003885 l
209
+ 2013_05_28_drive_0004_sync 0000003905 l
210
+ 2013_05_28_drive_0004_sync 0000003925 l
211
+ 2013_05_28_drive_0004_sync 0000003945 l
212
+ 2013_05_28_drive_0004_sync 0000003965 l
213
+ 2013_05_28_drive_0004_sync 0000004399 l
214
+ 2013_05_28_drive_0004_sync 0000004439 l
215
+ 2013_05_28_drive_0004_sync 0000004459 l
216
+ 2013_05_28_drive_0004_sync 0000004479 l
217
+ 2013_05_28_drive_0004_sync 0000004499 l
218
+ 2013_05_28_drive_0004_sync 0000004539 l
219
+ 2013_05_28_drive_0004_sync 0000004559 l
220
+ 2013_05_28_drive_0004_sync 0000004596 l
221
+ 2013_05_28_drive_0004_sync 0000004616 l
222
+ 2013_05_28_drive_0004_sync 0000004636 l
223
+ 2013_05_28_drive_0004_sync 0000004656 l
224
+ 2013_05_28_drive_0004_sync 0000004696 l
225
+ 2013_05_28_drive_0004_sync 0000004717 l
226
+ 2013_05_28_drive_0004_sync 0000004737 l
227
+ 2013_05_28_drive_0004_sync 0000004897 l
228
+ 2013_05_28_drive_0004_sync 0000004917 l
229
+ 2013_05_28_drive_0005_sync 0000004806 l
230
+ 2013_05_28_drive_0005_sync 0000004826 l
231
+ 2013_05_28_drive_0005_sync 0000004846 l
232
+ 2013_05_28_drive_0005_sync 0000004866 l
233
+ 2013_05_28_drive_0005_sync 0000004886 l
234
+ 2013_05_28_drive_0005_sync 0000004906 l
235
+ 2013_05_28_drive_0005_sync 0000004926 l
236
+ 2013_05_28_drive_0005_sync 0000004946 l
237
+ 2013_05_28_drive_0005_sync 0000004986 l
238
+ 2013_05_28_drive_0005_sync 0000005006 l
239
+ 2013_05_28_drive_0005_sync 0000005026 l
240
+ 2013_05_28_drive_0005_sync 0000005046 l
241
+ 2013_05_28_drive_0005_sync 0000005086 l
242
+ 2013_05_28_drive_0005_sync 0000005157 l
243
+ 2013_05_28_drive_0005_sync 0000005190 l
244
+ 2013_05_28_drive_0005_sync 0000005210 l
245
+ 2013_05_28_drive_0005_sync 0000005569 l
246
+ 2013_05_28_drive_0005_sync 0000005589 l
247
+ 2013_05_28_drive_0005_sync 0000005649 l
248
+ 2013_05_28_drive_0005_sync 0000005669 l
249
+ 2013_05_28_drive_0005_sync 0000005689 l
250
+ 2013_05_28_drive_0005_sync 0000005709 l
251
+ 2013_05_28_drive_0005_sync 0000005729 l
252
+ 2013_05_28_drive_0005_sync 0000005749 l
253
+ 2013_05_28_drive_0005_sync 0000005769 l
254
+ 2013_05_28_drive_0005_sync 0000005809 l
255
+ 2013_05_28_drive_0005_sync 0000005829 l
256
+ 2013_05_28_drive_0005_sync 0000005883 l
257
+ 2013_05_28_drive_0005_sync 0000005971 l
258
+ 2013_05_28_drive_0005_sync 0000005991 l
259
+ 2013_05_28_drive_0005_sync 0000006011 l
260
+ 2013_05_28_drive_0005_sync 0000006031 l
261
+ 2013_05_28_drive_0005_sync 0000006051 l
262
+ 2013_05_28_drive_0005_sync 0000006071 l
263
+ 2013_05_28_drive_0005_sync 0000006131 l
264
+ 2013_05_28_drive_0005_sync 0000006151 l
265
+ 2013_05_28_drive_0005_sync 0000006211 l
266
+ 2013_05_28_drive_0005_sync 0000006251 l
267
+ 2013_05_28_drive_0005_sync 0000006271 l
268
+ 2013_05_28_drive_0006_sync 0000000130 l
269
+ 2013_05_28_drive_0006_sync 0000000150 l
270
+ 2013_05_28_drive_0006_sync 0000000170 l
271
+ 2013_05_28_drive_0006_sync 0000000210 l
272
+ 2013_05_28_drive_0006_sync 0000000230 l
273
+ 2013_05_28_drive_0006_sync 0000000250 l
274
+ 2013_05_28_drive_0006_sync 0000000290 l
275
+ 2013_05_28_drive_0006_sync 0000000310 l
276
+ 2013_05_28_drive_0006_sync 0000000330 l
277
+ 2013_05_28_drive_0006_sync 0000000350 l
278
+ 2013_05_28_drive_0006_sync 0000000370 l
279
+ 2013_05_28_drive_0006_sync 0000000430 l
280
+ 2013_05_28_drive_0006_sync 0000000450 l
281
+ 2013_05_28_drive_0006_sync 0000000470 l
282
+ 2013_05_28_drive_0006_sync 0000000490 l
283
+ 2013_05_28_drive_0006_sync 0000000510 l
284
+ 2013_05_28_drive_0006_sync 0000000551 l
285
+ 2013_05_28_drive_0006_sync 0000000622 l
286
+ 2013_05_28_drive_0006_sync 0000000642 l
287
+ 2013_05_28_drive_0006_sync 0000000662 l
288
+ 2013_05_28_drive_0006_sync 0000000682 l
289
+ 2013_05_28_drive_0006_sync 0000000702 l
290
+ 2013_05_28_drive_0006_sync 0000000722 l
291
+ 2013_05_28_drive_0006_sync 0000000742 l
292
+ 2013_05_28_drive_0006_sync 0000000822 l
293
+ 2013_05_28_drive_0006_sync 0000000842 l
294
+ 2013_05_28_drive_0006_sync 0000000862 l
295
+ 2013_05_28_drive_0006_sync 0000000882 l
296
+ 2013_05_28_drive_0006_sync 0000000902 l
297
+ 2013_05_28_drive_0006_sync 0000000922 l
298
+ 2013_05_28_drive_0006_sync 0000000962 l
299
+ 2013_05_28_drive_0006_sync 0000000982 l
300
+ 2013_05_28_drive_0006_sync 0000001062 l
301
+ 2013_05_28_drive_0006_sync 0000001082 l
302
+ 2013_05_28_drive_0006_sync 0000001102 l
303
+ 2013_05_28_drive_0006_sync 0000001142 l
304
+ 2013_05_28_drive_0006_sync 0000001162 l
305
+ 2013_05_28_drive_0006_sync 0000001182 l
306
+ 2013_05_28_drive_0006_sync 0000001202 l
307
+ 2013_05_28_drive_0006_sync 0000002304 l
308
+ 2013_05_28_drive_0006_sync 0000002324 l
309
+ 2013_05_28_drive_0006_sync 0000002344 l
310
+ 2013_05_28_drive_0006_sync 0000002364 l
311
+ 2013_05_28_drive_0006_sync 0000002384 l
312
+ 2013_05_28_drive_0006_sync 0000002404 l
313
+ 2013_05_28_drive_0006_sync 0000002424 l
314
+ 2013_05_28_drive_0006_sync 0000002444 l
315
+ 2013_05_28_drive_0006_sync 0000002464 l
316
+ 2013_05_28_drive_0006_sync 0000002484 l
317
+ 2013_05_28_drive_0006_sync 0000002592 l
318
+ 2013_05_28_drive_0006_sync 0000002613 l
319
+ 2013_05_28_drive_0006_sync 0000002633 l
320
+ 2013_05_28_drive_0006_sync 0000002673 l
321
+ 2013_05_28_drive_0006_sync 0000002693 l
322
+ 2013_05_28_drive_0006_sync 0000002733 l
323
+ 2013_05_28_drive_0006_sync 0000002753 l
324
+ 2013_05_28_drive_0006_sync 0000002773 l
325
+ 2013_05_28_drive_0006_sync 0000002793 l
326
+ 2013_05_28_drive_0006_sync 0000009236 l
327
+ 2013_05_28_drive_0006_sync 0000009256 l
328
+ 2013_05_28_drive_0006_sync 0000009296 l
329
+ 2013_05_28_drive_0006_sync 0000009316 l
330
+ 2013_05_28_drive_0006_sync 0000009336 l
331
+ 2013_05_28_drive_0006_sync 0000009376 l
332
+ 2013_05_28_drive_0006_sync 0000009396 l
333
+ 2013_05_28_drive_0006_sync 0000009416 l
334
+ 2013_05_28_drive_0006_sync 0000009456 l
335
+ 2013_05_28_drive_0006_sync 0000009476 l
336
+ 2013_05_28_drive_0006_sync 0000009496 l
337
+ 2013_05_28_drive_0006_sync 0000009516 l
338
+ 2013_05_28_drive_0006_sync 0000009536 l
339
+ 2013_05_28_drive_0007_sync 0000000019 l
340
+ 2013_05_28_drive_0007_sync 0000000039 l
341
+ 2013_05_28_drive_0007_sync 0000000059 l
342
+ 2013_05_28_drive_0007_sync 0000000079 l
343
+ 2013_05_28_drive_0007_sync 0000000099 l
344
+ 2013_05_28_drive_0007_sync 0000000119 l
345
+ 2013_05_28_drive_0007_sync 0000000139 l
346
+ 2013_05_28_drive_0007_sync 0000000159 l
347
+ 2013_05_28_drive_0007_sync 0000000179 l
348
+ 2013_05_28_drive_0007_sync 0000000199 l
349
+ 2013_05_28_drive_0007_sync 0000000219 l
350
+ 2013_05_28_drive_0007_sync 0000000439 l
351
+ 2013_05_28_drive_0009_sync 0000001030 l
352
+ 2013_05_28_drive_0009_sync 0000001050 l
353
+ 2013_05_28_drive_0009_sync 0000001070 l
354
+ 2013_05_28_drive_0009_sync 0000001090 l
355
+ 2013_05_28_drive_0009_sync 0000001110 l
356
+ 2013_05_28_drive_0009_sync 0000001130 l
357
+ 2013_05_28_drive_0009_sync 0000001150 l
358
+ 2013_05_28_drive_0009_sync 0000001170 l
359
+ 2013_05_28_drive_0009_sync 0000001190 l
360
+ 2013_05_28_drive_0009_sync 0000001210 l
361
+ 2013_05_28_drive_0009_sync 0000001230 l
362
+ 2013_05_28_drive_0009_sync 0000001250 l
363
+ 2013_05_28_drive_0009_sync 0000001270 l
364
+ 2013_05_28_drive_0009_sync 0000001290 l
365
+ 2013_05_28_drive_0009_sync 0000001310 l
366
+ 2013_05_28_drive_0009_sync 0000001330 l
367
+ 2013_05_28_drive_0009_sync 0000001350 l
368
+ 2013_05_28_drive_0009_sync 0000001370 l
369
+ 2013_05_28_drive_0009_sync 0000004495 l
370
+ 2013_05_28_drive_0009_sync 0000004555 l
371
+ 2013_05_28_drive_0009_sync 0000004575 l
372
+ 2013_05_28_drive_0009_sync 0000004595 l
373
+ 2013_05_28_drive_0009_sync 0000004615 l
374
+ 2013_05_28_drive_0009_sync 0000004635 l
375
+ 2013_05_28_drive_0009_sync 0000004655 l
376
+ 2013_05_28_drive_0009_sync 0000004675 l
377
+ 2013_05_28_drive_0009_sync 0000004695 l
378
+ 2013_05_28_drive_0009_sync 0000004719 l
379
+ 2013_05_28_drive_0009_sync 0000004845 l
380
+ 2013_05_28_drive_0009_sync 0000004869 l
381
+ 2013_05_28_drive_0009_sync 0000004889 l
382
+ 2013_05_28_drive_0009_sync 0000005184 l
383
+ 2013_05_28_drive_0009_sync 0000005204 l
384
+ 2013_05_28_drive_0009_sync 0000005224 l
385
+ 2013_05_28_drive_0009_sync 0000005244 l
386
+ 2013_05_28_drive_0009_sync 0000005264 l
387
+ 2013_05_28_drive_0009_sync 0000005284 l
388
+ 2013_05_28_drive_0009_sync 0000005304 l
389
+ 2013_05_28_drive_0009_sync 0000005324 l
390
+ 2013_05_28_drive_0009_sync 0000005344 l
391
+ 2013_05_28_drive_0009_sync 0000005364 l
392
+ 2013_05_28_drive_0009_sync 0000005384 l
393
+ 2013_05_28_drive_0009_sync 0000005404 l
394
+ 2013_05_28_drive_0009_sync 0000005424 l
395
+ 2013_05_28_drive_0009_sync 0000005444 l
396
+ 2013_05_28_drive_0009_sync 0000005464 l
397
+ 2013_05_28_drive_0009_sync 0000005484 l
398
+ 2013_05_28_drive_0009_sync 0000005504 l
399
+ 2013_05_28_drive_0009_sync 0000005524 l
400
+ 2013_05_28_drive_0009_sync 0000005544 l
401
+ 2013_05_28_drive_0009_sync 0000005564 l
402
+ 2013_05_28_drive_0009_sync 0000005584 l
403
+ 2013_05_28_drive_0009_sync 0000005624 l
404
+ 2013_05_28_drive_0009_sync 0000005644 l
405
+ 2013_05_28_drive_0009_sync 0000005664 l
406
+ 2013_05_28_drive_0009_sync 0000005684 l
407
+ 2013_05_28_drive_0009_sync 0000005704 l
408
+ 2013_05_28_drive_0009_sync 0000006291 l
409
+ 2013_05_28_drive_0009_sync 0000006311 l
410
+ 2013_05_28_drive_0009_sync 0000006351 l
411
+ 2013_05_28_drive_0009_sync 0000006371 l
412
+ 2013_05_28_drive_0009_sync 0000006391 l
413
+ 2013_05_28_drive_0009_sync 0000006411 l
414
+ 2013_05_28_drive_0009_sync 0000006431 l
415
+ 2013_05_28_drive_0009_sync 0000006451 l
416
+ 2013_05_28_drive_0009_sync 0000006471 l
417
+ 2013_05_28_drive_0009_sync 0000006491 l
418
+ 2013_05_28_drive_0009_sync 0000006511 l
419
+ 2013_05_28_drive_0010_sync 0000001896 l
420
+ 2013_05_28_drive_0010_sync 0000001916 l
421
+ 2013_05_28_drive_0010_sync 0000001936 l
422
+ 2013_05_28_drive_0010_sync 0000001956 l
423
+ 2013_05_28_drive_0010_sync 0000001976 l
424
+ 2013_05_28_drive_0010_sync 0000001996 l
425
+ 2013_05_28_drive_0010_sync 0000002016 l
426
+ 2013_05_28_drive_0010_sync 0000002036 l
427
+ 2013_05_28_drive_0010_sync 0000002056 l
428
+ 2013_05_28_drive_0010_sync 0000002076 l
429
+ 2013_05_28_drive_0010_sync 0000002096 l
430
+ 2013_05_28_drive_0010_sync 0000002145 l
431
+ 2013_05_28_drive_0010_sync 0000002165 l
432
+ 2013_05_28_drive_0010_sync 0000002185 l
433
+ 2013_05_28_drive_0010_sync 0000002205 l
434
+ 2013_05_28_drive_0010_sync 0000002225 l
435
+ 2013_05_28_drive_0010_sync 0000002615 l
436
+ 2013_05_28_drive_0010_sync 0000002635 l
437
+ 2013_05_28_drive_0010_sync 0000002655 l
438
+ 2013_05_28_drive_0010_sync 0000002675 l
439
+ 2013_05_28_drive_0010_sync 0000002695 l
440
+ 2013_05_28_drive_0010_sync 0000002755 l
441
+ 2013_05_28_drive_0010_sync 0000002795 l
442
+ 2013_05_28_drive_0010_sync 0000002815 l
443
+ 2013_05_28_drive_0010_sync 0000002835 l
444
+ 2013_05_28_drive_0010_sync 0000002855 l
445
+ 2013_05_28_drive_0010_sync 0000002875 l
446
+ 2013_05_28_drive_0010_sync 0000002895 l
datasets/kitti_360/splits/seg/train_files.txt ADDED
The diff for this file is too large to render. See raw diff