Spaces:

naver
/

PUMP

Runtime error

App Files Files Community

Philippe Weinzaepfel commited on Jun 14, 2022

Commit

3ef85e9

0 Parent(s):

huggingface demo

Browse files

Files changed (49) hide show

.gitattributes +27 -0
LICENSE +446 -0
NOTICE +46 -0
README.md +163 -0
app.py +77 -0
checkpoints/PUMP-stytrf.pt +3 -0
checkpoints/PUMP.pt +3 -0
core/conv_mixer.py +87 -0
core/cuda_deepm/.gitignore +4 -0
core/cuda_deepm/__init__.py +9 -0
core/cuda_deepm/func.cpp +215 -0
core/cuda_deepm/kernels.cu +578 -0
core/cuda_deepm/setup.py +24 -0
core/functional.py +440 -0
core/losses/__init__.py +8 -0
core/losses/ap_loss.py +61 -0
core/losses/ap_loss_sampler.py +131 -0
core/losses/multiloss.py +57 -0
core/losses/pixel_ap_loss.py +82 -0
core/losses/unsupervised_deepmatching_loss.py +146 -0
core/pixel_desc.py +60 -0
datasets/__init__.py +9 -0
datasets/demo_warp/mountains_src.jpg +0 -0
datasets/demo_warp/mountains_tgt.jpg +0 -0
datasets/image_set.py +91 -0
datasets/pair_dataset.py +226 -0
datasets/pair_loader.py +291 -0
datasets/sfm120k.py +27 -0
datasets/transforms.py +540 -0
datasets/transforms_tools.py +71 -0
datasets/utils.py +104 -0
datasets/web_images.py +50 -0
demo_warping.py +102 -0
download_training_data.sh +53 -0
imgs/demo_warp.jpg +0 -0
imgs/overview.png +0 -0
imgs/teaser_paper.jpg +0 -0
imgs/test.png +0 -0
post_filter.py +235 -0
requirements.txt +5 -0
run_ETH3D.py +118 -0
test_multiscale.py +262 -0
test_multiscale_recursive.py +24 -0
test_singlescale.py +284 -0
test_singlescale_recursive.py +156 -0
tools/common.py +95 -0
tools/trainer.py +125 -0
tools/viz.py +266 -0
train.py +121 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,27 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,446 @@

+PUMP
+Copyright (c) 2022-present NAVER Corp.
+Creative Commons Attribution-NonCommercial-ShareAlike 4.0 license
+A summary of the CC BY-NC-SA 4.0 license is located here:
+	https://creativecommons.org/licenses/by-nc-sa/4.0/
+---
+Attribution-NonCommercial-ShareAlike 4.0 International
+=======================================================================
+Creative Commons Corporation ("Creative Commons") is not a law firm and
+does not provide legal services or legal advice. Distribution of
+Creative Commons public licenses does not create a lawyer-client or
+other relationship. Creative Commons makes its licenses and related
+information available on an "as-is" basis. Creative Commons gives no
+warranties regarding its licenses, any material licensed under their
+terms and conditions, or any related information. Creative Commons
+disclaims all liability for damages resulting from their use to the
+fullest extent possible.
+Using Creative Commons Public Licenses
+Creative Commons public licenses provide a standard set of terms and
+conditions that creators and other rights holders may use to share
+original works of authorship and other material subject to copyright
+and certain other rights specified in the public license below. The
+following considerations are for informational purposes only, are not
+exhaustive, and do not form part of our licenses.
+     Considerations for licensors: Our public licenses are
+     intended for use by those authorized to give the public
+     permission to use material in ways otherwise restricted by
+     copyright and certain other rights. Our licenses are
+     irrevocable. Licensors should read and understand the terms
+     and conditions of the license they choose before applying it.
+     Licensors should also secure all rights necessary before
+     applying our licenses so that the public can reuse the
+     material as expected. Licensors should clearly mark any
+     material not subject to the license. This includes other CC-
+     licensed material, or material used under an exception or
+     limitation to copyright. More considerations for licensors:
+    wiki.creativecommons.org/Considerations_for_licensors
+     Considerations for the public: By using one of our public
+     licenses, a licensor grants the public permission to use the
+     licensed material under specified terms and conditions. If
+     the licensor's permission is not necessary for any reason--for
+     example, because of any applicable exception or limitation to
+     copyright--then that use is not regulated by the license. Our
+     licenses grant only permissions under copyright and certain
+     other rights that a licensor has authority to grant. Use of
+     the licensed material may still be restricted for other
+     reasons, including because others have copyright or other
+     rights in the material. A licensor may make special requests,
+     such as asking that all changes be marked or described.
+     Although not required by our licenses, you are encouraged to
+     respect those requests where reasonable. More considerations
+     for the public:
+    wiki.creativecommons.org/Considerations_for_licensees
+=======================================================================
+Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
+Public License
+By exercising the Licensed Rights (defined below), You accept and agree
+to be bound by the terms and conditions of this Creative Commons
+Attribution-NonCommercial-ShareAlike 4.0 International Public License
+("Public License"). To the extent this Public License may be
+interpreted as a contract, You are granted the Licensed Rights in
+consideration of Your acceptance of these terms and conditions, and the
+Licensor grants You such rights in consideration of benefits the
+Licensor receives from making the Licensed Material available under
+these terms and conditions.
+Section 1 -- Definitions.
+  a. Adapted Material means material subject to Copyright and Similar
+     Rights that is derived from or based upon the Licensed Material
+     and in which the Licensed Material is translated, altered,
+     arranged, transformed, or otherwise modified in a manner requiring
+     permission under the Copyright and Similar Rights held by the
+     Licensor. For purposes of this Public License, where the Licensed
+     Material is a musical work, performance, or sound recording,
+     Adapted Material is always produced where the Licensed Material is
+     synched in timed relation with a moving image.
+  b. Adapter's License means the license You apply to Your Copyright
+     and Similar Rights in Your contributions to Adapted Material in
+     accordance with the terms and conditions of this Public License.
+  c. BY-NC-SA Compatible License means a license listed at
+     creativecommons.org/compatiblelicenses, approved by Creative
+     Commons as essentially the equivalent of this Public License.
+  d. Copyright and Similar Rights means copyright and/or similar rights
+     closely related to copyright including, without limitation,
+     performance, broadcast, sound recording, and Sui Generis Database
+     Rights, without regard to how the rights are labeled or
+     categorized. For purposes of this Public License, the rights
+     specified in Section 2(b)(1)-(2) are not Copyright and Similar
+     Rights.
+  e. Effective Technological Measures means those measures that, in the
+     absence of proper authority, may not be circumvented under laws
+     fulfilling obligations under Article 11 of the WIPO Copyright
+     Treaty adopted on December 20, 1996, and/or similar international
+     agreements.
+  f. Exceptions and Limitations means fair use, fair dealing, and/or
+     any other exception or limitation to Copyright and Similar Rights
+     that applies to Your use of the Licensed Material.
+  g. License Elements means the license attributes listed in the name
+     of a Creative Commons Public License. The License Elements of this
+     Public License are Attribution, NonCommercial, and ShareAlike.
+  h. Licensed Material means the artistic or literary work, database,
+     or other material to which the Licensor applied this Public
+     License.
+  i. Licensed Rights means the rights granted to You subject to the
+     terms and conditions of this Public License, which are limited to
+     all Copyright and Similar Rights that apply to Your use of the
+     Licensed Material and that the Licensor has authority to license.
+  j. Licensor means the individual(s) or entity(ies) granting rights
+     under this Public License.
+  k. NonCommercial means not primarily intended for or directed towards
+     commercial advantage or monetary compensation. For purposes of
+     this Public License, the exchange of the Licensed Material for
+     other material subject to Copyright and Similar Rights by digital
+     file-sharing or similar means is NonCommercial provided there is
+     no payment of monetary compensation in connection with the
+     exchange.
+  l. Share means to provide material to the public by any means or
+     process that requires permission under the Licensed Rights, such
+     as reproduction, public display, public performance, distribution,
+     dissemination, communication, or importation, and to make material
+     available to the public including in ways that members of the
+     public may access the material from a place and at a time
+     individually chosen by them.
+  m. Sui Generis Database Rights means rights other than copyright
+     resulting from Directive 96/9/EC of the European Parliament and of
+     the Council of 11 March 1996 on the legal protection of databases,
+     as amended and/or succeeded, as well as other essentially
+     equivalent rights anywhere in the world.
+  n. You means the individual or entity exercising the Licensed Rights
+     under this Public License. Your has a corresponding meaning.
+Section 2 -- Scope.
+  a. License grant.
+       1. Subject to the terms and conditions of this Public License,
+          the Licensor hereby grants You a worldwide, royalty-free,
+          non-sublicensable, non-exclusive, irrevocable license to
+          exercise the Licensed Rights in the Licensed Material to:
+            a. reproduce and Share the Licensed Material, in whole or
+               in part, for NonCommercial purposes only; and
+            b. produce, reproduce, and Share Adapted Material for
+               NonCommercial purposes only.
+       2. Exceptions and Limitations. For the avoidance of doubt, where
+          Exceptions and Limitations apply to Your use, this Public
+          License does not apply, and You do not need to comply with
+          its terms and conditions.
+       3. Term. The term of this Public License is specified in Section
+          6(a).
+       4. Media and formats; technical modifications allowed. The
+          Licensor authorizes You to exercise the Licensed Rights in
+          all media and formats whether now known or hereafter created,
+          and to make technical modifications necessary to do so. The
+          Licensor waives and/or agrees not to assert any right or
+          authority to forbid You from making technical modifications
+          necessary to exercise the Licensed Rights, including
+          technical modifications necessary to circumvent Effective
+          Technological Measures. For purposes of this Public License,
+          simply making modifications authorized by this Section 2(a)
+          (4) never produces Adapted Material.
+       5. Downstream recipients.
+            a. Offer from the Licensor -- Licensed Material. Every
+               recipient of the Licensed Material automatically
+               receives an offer from the Licensor to exercise the
+               Licensed Rights under the terms and conditions of this
+               Public License.
+            b. Additional offer from the Licensor -- Adapted Material.
+               Every recipient of Adapted Material from You
+               automatically receives an offer from the Licensor to
+               exercise the Licensed Rights in the Adapted Material
+               under the conditions of the Adapter's License You apply.
+            c. No downstream restrictions. You may not offer or impose
+               any additional or different terms or conditions on, or
+               apply any Effective Technological Measures to, the
+               Licensed Material if doing so restricts exercise of the
+               Licensed Rights by any recipient of the Licensed
+               Material.
+       6. No endorsement. Nothing in this Public License constitutes or
+          may be construed as permission to assert or imply that You
+          are, or that Your use of the Licensed Material is, connected
+          with, or sponsored, endorsed, or granted official status by,
+          the Licensor or others designated to receive attribution as
+          provided in Section 3(a)(1)(A)(i).
+  b. Other rights.
+       1. Moral rights, such as the right of integrity, are not
+          licensed under this Public License, nor are publicity,
+          privacy, and/or other similar personality rights; however, to
+          the extent possible, the Licensor waives and/or agrees not to
+          assert any such rights held by the Licensor to the limited
+          extent necessary to allow You to exercise the Licensed
+          Rights, but not otherwise.
+       2. Patent and trademark rights are not licensed under this
+          Public License.
+       3. To the extent possible, the Licensor waives any right to
+          collect royalties from You for the exercise of the Licensed
+          Rights, whether directly or through a collecting society
+          under any voluntary or waivable statutory or compulsory
+          licensing scheme. In all other cases the Licensor expressly
+          reserves any right to collect such royalties, including when
+          the Licensed Material is used other than for NonCommercial
+          purposes.
+Section 3 -- License Conditions.
+Your exercise of the Licensed Rights is expressly made subject to the
+following conditions.
+  a. Attribution.
+       1. If You Share the Licensed Material (including in modified
+          form), You must:
+            a. retain the following if it is supplied by the Licensor
+               with the Licensed Material:
+                 i. identification of the creator(s) of the Licensed
+                    Material and any others designated to receive
+                    attribution, in any reasonable manner requested by
+                    the Licensor (including by pseudonym if
+                    designated);
+                ii. a copyright notice;
+               iii. a notice that refers to this Public License;
+                iv. a notice that refers to the disclaimer of
+                    warranties;
+                 v. a URI or hyperlink to the Licensed Material to the
+                    extent reasonably practicable;
+            b. indicate if You modified the Licensed Material and
+               retain an indication of any previous modifications; and
+            c. indicate the Licensed Material is licensed under this
+               Public License, and include the text of, or the URI or
+               hyperlink to, this Public License.
+       2. You may satisfy the conditions in Section 3(a)(1) in any
+          reasonable manner based on the medium, means, and context in
+          which You Share the Licensed Material. For example, it may be
+          reasonable to satisfy the conditions by providing a URI or
+          hyperlink to a resource that includes the required
+          information.
+       3. If requested by the Licensor, You must remove any of the
+          information required by Section 3(a)(1)(A) to the extent
+          reasonably practicable.
+  b. ShareAlike.
+     In addition to the conditions in Section 3(a), if You Share
+     Adapted Material You produce, the following conditions also apply.
+       1. The Adapter's License You apply must be a Creative Commons
+          license with the same License Elements, this version or
+          later, or a BY-NC-SA Compatible License.
+       2. You must include the text of, or the URI or hyperlink to, the
+          Adapter's License You apply. You may satisfy this condition
+          in any reasonable manner based on the medium, means, and
+          context in which You Share Adapted Material.
+       3. You may not offer or impose any additional or different terms
+          or conditions on, or apply any Effective Technological
+          Measures to, Adapted Material that restrict exercise of the
+          rights granted under the Adapter's License You apply.
+Section 4 -- Sui Generis Database Rights.
+Where the Licensed Rights include Sui Generis Database Rights that
+apply to Your use of the Licensed Material:
+  a. for the avoidance of doubt, Section 2(a)(1) grants You the right
+     to extract, reuse, reproduce, and Share all or a substantial
+     portion of the contents of the database for NonCommercial purposes
+     only;
+  b. if You include all or a substantial portion of the database
+     contents in a database in which You have Sui Generis Database
+     Rights, then the database in which You have Sui Generis Database
+     Rights (but not its individual contents) is Adapted Material,
+     including for purposes of Section 3(b); and
+  c. You must comply with the conditions in Section 3(a) if You Share
+     all or a substantial portion of the contents of the database.
+For the avoidance of doubt, this Section 4 supplements and does not
+replace Your obligations under this Public License where the Licensed
+Rights include other Copyright and Similar Rights.
+Section 5 -- Disclaimer of Warranties and Limitation of Liability.
+  a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
+     EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
+     AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
+     ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
+     IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
+     WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
+     PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
+     ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
+     KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
+     ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
+  b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
+     TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
+     NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
+     INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
+     COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
+     USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
+     ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
+     DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
+     IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
+  c. The disclaimer of warranties and limitation of liability provided
+     above shall be interpreted in a manner that, to the extent
+     possible, most closely approximates an absolute disclaimer and
+     waiver of all liability.
+Section 6 -- Term and Termination.
+  a. This Public License applies for the term of the Copyright and
+     Similar Rights licensed here. However, if You fail to comply with
+     this Public License, then Your rights under this Public License
+     terminate automatically.
+  b. Where Your right to use the Licensed Material has terminated under
+     Section 6(a), it reinstates:
+       1. automatically as of the date the violation is cured, provided
+          it is cured within 30 days of Your discovery of the
+          violation; or
+       2. upon express reinstatement by the Licensor.
+     For the avoidance of doubt, this Section 6(b) does not affect any
+     right the Licensor may have to seek remedies for Your violations
+     of this Public License.
+  c. For the avoidance of doubt, the Licensor may also offer the
+     Licensed Material under separate terms or conditions or stop
+     distributing the Licensed Material at any time; however, doing so
+     will not terminate this Public License.
+  d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
+     License.
+Section 7 -- Other Terms and Conditions.
+  a. The Licensor shall not be bound by any additional or different
+     terms or conditions communicated by You unless expressly agreed.
+  b. Any arrangements, understandings, or agreements regarding the
+     Licensed Material not stated herein are separate from and
+     independent of the terms and conditions of this Public License.
+Section 8 -- Interpretation.
+  a. For the avoidance of doubt, this Public License does not, and
+     shall not be interpreted to, reduce, limit, restrict, or impose
+     conditions on any use of the Licensed Material that could lawfully
+     be made without permission under this Public License.
+  b. To the extent possible, if any provision of this Public License is
+     deemed unenforceable, it shall be automatically reformed to the
+     minimum extent necessary to make it enforceable. If the provision
+     cannot be reformed, it shall be severed from this Public License
+     without affecting the enforceability of the remaining terms and
+     conditions.
+  c. No term or condition of this Public License will be waived and no
+     failure to comply consented to unless expressly agreed to by the
+     Licensor.
+  d. Nothing in this Public License constitutes or may be interpreted
+     as a limitation upon, or waiver of, any privileges and immunities
+     that apply to the Licensor or You, including from the legal
+     processes of any jurisdiction or authority.
+=======================================================================
+Creative Commons is not a party to its public
+licenses. Notwithstanding, Creative Commons may elect to apply one of
+its public licenses to material it publishes and in those instances
+will be considered the “Licensor.” The text of the Creative Commons
+public licenses is dedicated to the public domain under the CC0 Public
+Domain Dedication. Except for the limited purpose of indicating that
+material is shared under a Creative Commons public license or as
+otherwise permitted by the Creative Commons policies published at
+creativecommons.org/policies, Creative Commons does not authorize the
+use of the trademark "Creative Commons" or any other trademark or logo
+of Creative Commons without its prior written consent including,
+without limitation, in connection with any unauthorized modifications
+to any of its public licenses or any other arrangements,
+understandings, or agreements concerning use of licensed material. For
+the avoidance of doubt, this paragraph does not form part of the
+public licenses.
+Creative Commons may be contacted at creativecommons.org.

NOTICE ADDED Viewed

	@@ -0,0 +1,46 @@

+PUMP
+Copyright (c) 2022-present NAVER Corp.
+Creative Commons Attribution-NonCommercial-ShareAlike 4.0 license
+--------------------------------------------------------------------------------------
+This project contains subcomponents with separate copyright notices and license terms.
+Your use of the source code for these subcomponents is subject to the terms and conditions of the following licenses.
+=====
+pytorch/vision
+https://github.com/pytorch/vision
+BSD 3-Clause License
+Copyright (c) Soumith Chintala 2016,
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+=====

README.md ADDED Viewed

	@@ -0,0 +1,163 @@

+---
+title: PUMP
+emoji: 📚
+colorFrom: yellow
+colorTo: red
+sdk: gradio
+app_file: app.py
+pinned: false
+---
+# PUMP: pyramidal and uniqueness matching priors for unsupervised learning of local features #
+![image](imgs/teaser_paper.jpg)
+Official repository for the following [paper](https://europe.naverlabs.com/research/publications/pump-pyramidal-and-uniqueness-matching-priors-for-unsupervised-learning-of-local-features/):
+```text
+@inproceedings{cvpr22_pump,
+  author    = {Jerome Revaud, Vincent Leroy, Philippe Weinzaepfel, Boris Chidlovskii},
+  title     = {PUMP: pyramidal and uniqueness matching priors for unsupervised learning of local features},
+  booktitle = {CVPR},
+  year      = {2022},
+}
+```
+![image](imgs/overview.png)
+License
+-------
+Our code is released under the CC BY-NC-SA 4.0 License (see [LICENSE](LICENSE) for more details), available only for non-commercial use.
+Requirements
+------------
+  - Python 3.8+ equipped with standard scientific packages and PyTorch / TorchVision:
+    ```
+    tqdm >= 4
+    PIL >= 8.1.1
+    numpy >= 1.19
+    scipy >= 1.6
+    torch >= 1.10.0
+    torchvision >= 0.9.0
+    matplotlib >= 3.3.4
+    ```
+ - the CUDA tool kit, to compile custom CUDA kernels
+    ```bash
+    cd core/cuda_deepm/
+    python setup.py install
+    ```
+Warping Demo
+------------
+```bash
+python demo_warping.py
+```
+You should see the following result:
+![image](imgs/demo_warp.jpg)
+Test usage
+----------
+We provide 4 variations of the pairwise matching code, named `test_xxxscale_yyy.py`:
+ - xxx: `single`-scale or `multi`-scale.
+        Single-scale can cope with 0.75~1.33x scale difference at most.
+        Multi-scale version can also be rotation invariant if asked.
+ - yyy: recursive or not. Recursive is slower but provide denser/better outputs.
+For most cases, you want to use `test_multiscale.py`:
+```bash
+python test_multiscale.py
+    --img1 path/to/img1
+    --img2 path/to/img2
+    --resize 600 # important, see below
+    --post-filter
+    --output path/to/correspondences.npy
+```
+It outputs a numpy binary file with the field `file_data['corres']` containing a list of correspondences.
+The row format is `[x1, y1, x2, y2, score, scale_rot_code]`.
+Use `core.functional.decode_scale_rot(code) --> (scale, angle_in_degrees)` to decode the `scale_rot_code`.
+#### Optional parameters:
+  - **Prior image resize**: `--resize SIZE`
+    This is a very important parameter. In general, the bigger, the better (and slower).
+    Be wary that the memory footprint explodes with the image size.
+    Here is the table of maximum `--resize` values depending on the image aspect-ratio:
+    | Aspect-ratio | Example img sizes  | GPU memory | resize |
+    |--------------|--------------------|------------|--------|
+    |  4/3         | 800x600, 1024x768  |   16 Go    |  600   |
+    |  4/3         | 800x600, 1024x768  |   22 Go    |  680   |
+    |  4/3         | 800x600, 1024x768  |   32 Go    |  760   |
+    |  1/1         | 1024x1024          |   16 Go    |  540   |
+    |  1/1         | 1024x1024          |   22 Go    |  600   |
+    |  1/1         | 1024x1024          |   32 Go    |  660   |
+    (Formula: `memory_in_bytes = (W1*H1*W2*H2)*1.333*2/16`)
+  - **Base descriptor**: `--desc {PUMP, PUMP-stytrf}`
+    We provide the `PUMP` descriptor from our paper, as well as `PUMP-stytrf` (with additional style-transfer training).
+    Defaults to `PUMP-stytrf`.
+  - **Scale**: `--max-scale SCALE`
+    By default, this value is set to 4, meaning that PUMP is _at least_ invariant to a 4x zoom-in or
+    zoom-out. In practically all cases, this is more than enough. You may reduce this value if you know
+    this is too much in order to accelerate computations.
+  - **Rotation**: `--max-rot DEGREES`
+    By default, PUMP is not rotation-invariant. To enforce rotation invariance, you need to specify
+    the amount of rotation it can tolerate. The more, the slower. Maximum value is 180.
+    If you know that images are not vertically oriented, you can just use 90 degrees.
+  - **post-filter**: `--post-filter "option1=val1,option2=val2,..."`
+    When activated, post-filtering remove spurious correspondences based on their local consistency.
+    See `python post_filter.py --help` for details about the possible options.
+    It is geometry-agnostic and naturally supports dynamic scenes.
+    If you want to output _pixel-dense_ correspondences (a.k.a _optical flow_), you need to post-process
+    the correspondences with `--post-filter densify=True`. See `demo_warping.py` for an example.
+#### Visualization of results:
+```bash
+python -m tools.viz --img1 path/to/img1 --img2 path/to/img2 --corres path/to/correspondences.npy
+```
+Reproducing results on the ETH-3D dataset
+-----------------------------------------
+1. Download the ETH-3D dataset from [their website](https://www.eth3d.net/datasets) and extract it in `datasets/eth3d/`
+2. Run the code `python run_ETH3D.py`. You should get results slightly better than reported in the paper.
+Training PUMP from scratch
+--------------------------
+1. Download the training data with
+    ```bash
+    bash download_training_data.sh
+    ```
+   This consists of web images from [this paper](http://cmp.felk.cvut.cz/revisitop/) for the self-supervised loss (as in [R2D2](https://github.com/naver/r2d2))
+   and image pairs from the [SfM120k dataset](http://cmp.felk.cvut.cz/cnnimageretrieval/) with automatically
+   extracted pixel correspondences. Note that correspondences are *not* used in the loss, since the loss is
+   unsupervised. They are only necessary so that random cropping produces pairs of crops at least partially aligned.
+   Therefore, correspondences do not need to be 100% correct or even pixel-precise.
+2. Run `python train.py --save-path <output_dir>/`
+    Note that the training code is quite rudimentary (only supports `nn.DataParallel`,
+    no support for `DataDistributed` at the moment, and no validation phase neither).
+3. Move and rename your final checkpoint to `checkpoints/NAME.pt` and test it with
+   ```bash
+   python test_multiscale.py ... --desc NAME
+   ```

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import gradio as gr
+import sys, os
+import torch
+import matplotlib.pylab as plt
+def pump_matching(img1, img2, trained_with_st=False, scale=300, max_scale=1, max_rot=0, use_gpu=False):
+    use_singlescale = max_scale==1 and max_rot==0
+    if use_singlescale: # single
+        from test_singlescale import Main, arg_parser
+    else:
+        from test_multiscale import Main, arg_parser
+    parser = arg_parser()
+    args_list = ['--img1','dummy','--img2','dummy','--post-filter', '--desc','PUMP-stytrf' if trained_with_st else 'PUMP','--resize',str(scale)]
+    if not use_gpu:
+        args_list += ['--device', 'cpu']
+    if not use_singlescale:
+        args_list += ['--max-scale',str(max_scale),'--max-rot',str(max_rot)]
+    args = parser.parse_args(args_list)
+    corres = Main().run_from_args_with_images(img1, img2, args)
+    fig1 = plt.figure(1)
+    plt.imshow(img1)
+    ax1 = plt.gca()
+    ax1.axis('off')
+    plt.tight_layout()
+    fig2 = plt.figure(2)
+    plt.imshow(img2)
+    ax2 = plt.gca()
+    ax2.axis('off')
+    plt.tight_layout()
+    from tools.viz import plot_grid
+    if corres.shape[-1] > 4:
+        corres = corres[corres[:,4]>0,:] # select non-null correspondences
+    if corres.shape[0]>0: plot_grid(corres, ax1, ax2, marker='+')
+    img1 = None
+    img2 = None
+    return fig1, fig2
+has_cuda = torch.cuda.is_available() and torch.cuda.device_count()>0
+title = "PUMP local descriptor demo"
+description = "This is a visualization demo for the PUMP local descriptors presented in our CVPR 2022 paper <b><a href='https://europe.naverlabs.com/research/publications/pump-pyramidal-and-uniqueness-matching-priors-for-unsupervised-learning-of-local-features/' target='_blank'>PUMP: Pyramidal and Uniqueness Matching Priors for Unsupervised Learning of Local Features</a></b>.</p><p><b>WARNING:</b> this demo runs on cpus with downscaled images, without multi-scale or multi-rotations testing, due to limited memory and computational resources, please check out our <a href='https://github.com/naver/pump' target='_blank'>original github repo</a> for these features.</p>"
+article = "<p style='text-align: center'><a href='https://github.com/naver/pump' target='_blank'>Original Github Repo</a></p>"
+iface = gr.Interface(
+    fn=pump_matching,
+    inputs=[
+        gr.inputs.Image(shape=(1024, 1024), type="pil", label="First Image"),
+        gr.inputs.Image(shape=(1024, 1024), type="pil", label="Second Image"),
+        gr.inputs.Checkbox(default=False, label="Use the model trained with style transfer"),
+        #gr.inputs.Slider(minimum=300, maximum=600, default=400, step=1, label="Original test scale"),
+        #gr.inputs.Slider(minimum=1, maximum=4, default=1, step=0.1, label="Multi Scale Testing - maximum scale (makes it slower)"),
+        #gr.inputs.Slider(minimum=0, maximum=180, default=0, step=45, label="Multi Rotation Testing - max rot (makes it slower)"),]
+        #+ ([gr.inputs.Checkbox(default=True, label='Use GPU instead of CPU')] if has_cuda else []),"""
+        ],
+    outputs=[
+        gr.outputs.Image(type="plot", label="Matches in the first image"),
+        gr.outputs.Image(type="plot", label="Matches in the second image"),
+        ],
+    title=title,
+    theme='peach',
+    description=description,
+    article=article,
+    examples=[
+        ['datasets/demo_warp/mountains_src.jpg','datasets/demo_warp/mountains_tgt.jpg',False],#,400,1,0]+([True] if has_cuda else []),
+    ]
+)
+iface.launch(enable_queue=True)

checkpoints/PUMP-stytrf.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e78a9bbbd8a6c9823265adf41b4a330f87fa58fb07832d6d56c6ae94769fd27d
+size 13976029

checkpoints/PUMP.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a58cf5a1a4699e087c269ec9054c35637cd056fc68a37f1ee96da6b53e0804f
+size 13976029

core/conv_mixer.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+""" From the ICLR22 paper: Patches are all you need
+    https://openreview.net/pdf?id=TVHS5Y4dNvM
+"""
+class Residual(nn.Module):
+    def __init__(self, fn, stride=1):
+        super().__init__()
+        self.fn = fn
+        self.stride = stride
+    def forward(self, x):
+        s = slice(None,None,self.stride)
+        return x[:,:,s,s] + self.fn(x)[:,:,s,s]
+class ConvMixer (nn.Sequential):
+    """ Modified ConvMixer with convolutional layers at the bottom.
+    From the ICLR22 paper: Patches are all you need, https://openreview.net/pdf?id=TVHS5Y4dNvM
+    """
+    def __init__(self, output_dim, hidden_dim,
+                       depth=None, kernel_size=5, patch_size=8, group_size=1,
+                       preconv=1, faster=True, relu=nn.ReLU):
+        assert kernel_size % 2 == 1, 'kernel_size must be odd'
+        output_step = 1 + faster
+        assert patch_size % output_step == 0, f'patch_size must be multiple of {output_step}'
+        self.patch_size = patch_size
+        hidden_dims = [hidden_dim//4]*preconv + [hidden_dim]*(depth+1)
+        ops = [
+            nn.Conv2d(3, hidden_dims[0], kernel_size=5, padding=2),
+            relu(),
+            nn.BatchNorm2d(hidden_dims[0])]
+        for _ in range(1,preconv):
+            ops += [
+                nn.Conv2d(hidden_dims.pop(0), hidden_dims[0], kernel_size=3, padding=1),
+                relu(),
+                nn.BatchNorm2d(hidden_dims[0])]
+        ops += [
+            nn.Conv2d(hidden_dims.pop(0), hidden_dims[0], kernel_size=patch_size, stride=patch_size),
+            relu(),
+            nn.BatchNorm2d(hidden_dims[0])]
+        for idim, odim in zip(hidden_dims[0:], hidden_dims[1:]):
+            ops += [Residual(nn.Sequential(
+                        nn.Conv2d(idim, idim, kernel_size, groups=max(1,idim//group_size), padding=kernel_size//2),
+                        relu(),
+                        nn.BatchNorm2d(idim)
+                    )),
+                    nn.Conv2d(idim, odim, kernel_size=1),
+                    relu(),
+                    nn.BatchNorm2d(odim)]
+        ops += [
+            nn.Conv2d(odim, output_dim*(patch_size//output_step)**2, kernel_size=1),
+            nn.PixelShuffle( patch_size//output_step ),
+            nn.Upsample(scale_factor=output_step, mode='bilinear', align_corners=False)]
+        super().__init__(*ops)
+    def forward(self, img):
+        assert img.ndim == 4
+        B, C, H, W = img.shape
+        desc = super().forward(img)
+        return F.normalize(desc, dim=-3)
+if __name__ == '__main__':
+    net = ConvMixer3(128, 512, 7, patch_size=4, kernel_size=9)
+    print(net)
+    img = torch.rand(2,3,256,256)
+    print('input.shape =', img.shape)
+    desc = net(img)
+    print('desc.shape =', desc.shape)

core/cuda_deepm/.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+*.so
+_ext*
+__pycache__
+build

core/cuda_deepm/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+# run `python setup.py install`
+import cuda_deepm as _kernels
+__all__ = {k:v for k,v in vars(_kernels).items() if k[0] != '_'}
+globals().update(__all__)

core/cuda_deepm/func.cpp ADDED Viewed

	@@ -0,0 +1,215 @@

+// Copyright 2022-present NAVER Corp.
+// CC BY-NC-SA 4.0
+// Available only for non-commercial use
+#include <torch/extension.h>
+using namespace torch::indexing; // Slice
+#include <vector>
+#define MIN(x, y)           ((x) < (y) ? (x) : (y))
+#define MAX(x, y)           ((x) < (y) ? (y) : (x))
+#define CHECK_CUDA(x)       TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_INPUT(x)      CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
+inline Slice sl(bool x) {
+    if (x)
+        return Slice(0, -1);
+    else
+        return Slice(1, None);
+}
+torch::Tensor forward_agg_cuda( int level, float norm, const torch::Tensor lower,
+                                const at::optional<at::Tensor> weights, torch::Tensor upper );
+std::vector<torch::Tensor> forward_agg( int level, float norm, const torch::Tensor lower,
+                                        const at::optional<at::Tensor> weights = at::nullopt ) {
+    TORCH_CHECK(level >= 1, "level must be >= 1");
+    TORCH_CHECK(lower.dim() == 4, "input must have 4 dimensions");
+    const auto LH1 = lower.size(0);
+    const auto LW1 = lower.size(1);
+    const auto LH2 = lower.size(2);
+    const auto LW2 = lower.size(3);
+    if (weights) TORCH_CHECK(weights->size(0) == LH1 && weights->size(1) == LW1, "weights should have shape == lower.shape[:2]");
+    const auto UH1 = (level == 1) ? LH1+1 : LH1;
+    const auto UW1 = (level == 1) ? LW1+1 : LW1;
+    TORCH_CHECK(lower.is_cuda())
+    auto upper = torch::zeros({UH1, UW1, LH2, LW2}, lower.options());
+    torch::Tensor new_weights = forward_agg_cuda( level, norm, lower, weights, upper );
+    return {upper, new_weights};
+}
+torch::Tensor forward_pool_agg_cuda( int level, float norm, const torch::Tensor lower,
+                                     const at::optional<at::Tensor> weights, torch::Tensor upper );
+std::vector<torch::Tensor> forward_pool_agg( int level, float norm, const torch::Tensor lower,
+                                        const at::optional<at::Tensor> weights = at::nullopt ) {
+    TORCH_CHECK(level >= 1, "level must be >= 1");
+    TORCH_CHECK(lower.dim() == 4, "input must have 4 dimensions");
+    const auto LH1 = lower.size(0);
+    const auto LW1 = lower.size(1);
+    const auto LH2 = lower.size(2);
+    const auto LW2 = lower.size(3);
+    if (weights) TORCH_CHECK(weights->size(0) == LH1 && weights->size(1) == LW1, "weights should have shape == lower.shape[:2]");
+    const auto UH1 = (level == 1) ? LH1+1 : LH1;
+    const auto UW1 = (level == 1) ? LW1+1 : LW1;
+    TORCH_CHECK(lower.is_cuda())
+    auto upper = torch::zeros({UH1, UW1, 1+(LH2-1)/2, 1+(LW2-1)/2}, lower.options());
+    torch::Tensor new_weights = forward_pool_agg_cuda( level, norm, lower, weights, upper );
+    return {upper, new_weights};
+}
+// forward declaration
+void backward_agg_unpool_cuda( int level, const torch::Tensor upper, torch::Tensor lower, bool exclude_borders );
+void backward_agg_unpool( int level, const torch::Tensor upper, torch::Tensor lower, bool exclude_borders = true ) {
+    TORCH_CHECK(level >= 1, "level must be >= 1");
+    TORCH_CHECK( upper.dim() == 4 && lower.dim() == 4, "inputs should be 4-dimensional" );
+    TORCH_CHECK(upper.is_cuda() && lower.is_cuda())
+    backward_agg_unpool_cuda(level, upper, lower, exclude_borders);
+}
+void max_pool3d_cuda( const torch::Tensor tensor, const int kernel_size, const int stride,
+                            torch::Tensor maxima, torch::Tensor indices );
+std::vector<torch::Tensor> max_pool3d( const torch::Tensor tensor, const int kernel_size, const int stride ) {
+    TORCH_CHECK(tensor.dim() == 4, "tensor should be 4-dimensional: BxCxHxW");
+    TORCH_CHECK( 1 <= kernel_size, "bad kernel size %d", kernel_size );
+    TORCH_CHECK( 1 <= stride, "bad stride %d", stride );
+    const int IB = tensor.size(0);
+    const int IH = tensor.size(2); // input height
+    const int IW = tensor.size(3); // input width
+    // output size
+    const int OH = 1 + (IH - kernel_size) / stride;
+    const int OW = 1 + (IW - kernel_size) / stride;
+    torch::Tensor maxima  = torch::empty({IB, OH, OW}, tensor.options());
+    torch::Tensor indices = torch::empty({IB, OH, OW}, tensor.options().dtype(torch::kInt64));
+    if (tensor.is_cuda())
+        max_pool3d_cuda( tensor, kernel_size, stride, maxima, indices );
+    else
+        TORCH_CHECK(false, "CPU max_pool3d not implemented yet");
+    return {maxima, indices};
+}
+static inline float ptdot( const float* m, float x, float y ) {
+  return x*m[0] + y*m[1] + m[2];
+}
+static inline float pow2(float v) {
+    return v*v;
+}
+void merge_corres_cpu( const torch::Tensor corres, int offset, const torch::Tensor _inv_rot,
+                       float dmax, torch::Tensor all_corres, const int all_step ) {
+    const int H = corres.size(0);
+    const int W = corres.size(1);
+    const float tol = 2*2; // squared
+    dmax *= dmax; // squared
+    TORCH_CHECK( _inv_rot.is_contiguous() );
+    const float* inv_rot = _inv_rot.data_ptr<float>();
+    auto corres_a = corres.accessor<float,3>();
+    auto all_corres_a = all_corres.accessor<float,3>();
+    // for each bin of the final histograms, we get the nearest-neighbour bin in corres0 and corres1
+    for (int j=0; j<all_corres.size(0); j++)
+      for (int i=0; i<all_corres.size(1); i++) {
+        // printf("accessing all_corres[%d,%d]", j, i);
+        auto all_cor = all_corres_a[j][i];
+        // center of the bin in the reference frame
+        float x = i*all_step + all_step/2;
+        float y = j*all_step + all_step/2;
+        // printf(" -> (%g,%g) in ref img", x, y);
+        // center of the bin on the rescaled+rotated image
+        float xr = ptdot( inv_rot + 0, x, y );
+        float yr = ptdot( inv_rot + 3, x, y );
+        // printf(" -> (%g,%g) in rescaled", xr, yr);
+        // iterate on the nearby bins
+        int xb = (int)(0.5+ xr/4); // rescaled+rotated desc always has step 4
+        int yb = (int)(0.5+ yr/4);
+        // printf(" -> (%d,%d) in bins\n", xb, yb);
+        float best = dmax;
+        for (int v = MAX(0,yb-1); v <= MIN(H,yb+1); v++)
+          for (int u = MAX(0,xb-1); u <= MIN(W,xb+1); u++) {
+            // assert( v >= 0 && v < corres_a.size(0) );
+            // assert( u >= 0 && u < corres_a.size(1) );
+            auto cor = corres_a[v][u];
+            float d = pow2(cor[offset]-x) + pow2(cor[offset+1]-y);
+            if( d < best )  best = d;
+        }
+        for (int v = MAX(0,yb-1); v <= MIN(H,yb+1); v++)
+          for (int u = MAX(0,xb-1); u <= MIN(W,xb+1); u++) {
+            // assert( v >= 0 && v < corres_a.size(0) );
+            // assert( u >= 0 && u < corres_a.size(1) );
+            auto cor = corres_a[v][u];
+            float d = pow2(cor[offset]-x) + pow2(cor[offset+1]-y);
+            if( d <= tol*best ) { // spatially close
+                // merge correspondence if score is better than actual
+                // printf("update all_corres[%d,%d]\n", v,u);
+                if( cor[4] > all_cor[4] )
+                  for (int k = 0; k < all_corres.size(2); k++)
+                    all_cor[k] = cor[k];
+              }
+        }
+    }
+}
+void merge_corres_cuda( const torch::Tensor corres, int offset, const torch::Tensor inv_rot,
+                        float dmax, torch::Tensor all_corres, const int all_step );
+void merge_corres( const torch::Tensor corres, int offset, const torch::Tensor rot,
+                   torch::Tensor all_corres, const int all_step ) {
+    TORCH_CHECK(     corres.dim() == 3 &&     corres.size(2) == 6,     "corres.shape should be (H,W,6)" );
+    TORCH_CHECK( all_corres.dim() == 3 && all_corres.size(2) == 6, "all_corres.shape should be (H,W,6)" );
+    float dmax = 8 * torch::sqrt(torch::det(rot)).item<float>();
+    torch::Tensor inv_rot = torch::inverse(rot).contiguous();
+    if (all_corres.is_cuda())
+        merge_corres_cuda( corres, offset, inv_rot, dmax, all_corres, all_step );
+    else
+        merge_corres_cpu( corres, offset, inv_rot, dmax, all_corres, all_step );
+}
+void mask_correlations_radial_cuda( torch::Tensor corr, const torch::Tensor targets,
+                                    const float radius, const float alpha);
+void mask_correlations_radial( torch::Tensor corr, const torch::Tensor targets,
+                                    const float radius, const float alpha) {
+    // radius: protected area in pixels around each target center
+    // alpha: in [0,1]. If alpha = 0: no effect. If alpha = 1: full effect.
+    TORCH_CHECK( corr.dim() == 4 );
+    TORCH_CHECK( targets.dim() == 3 );
+    TORCH_CHECK( targets.size(0) == corr.size(0) && targets.size(1) == corr.size(1) && targets.size(2) == 2,
+        "correlations and targets should have the same shape[:2]" );
+    if (corr.is_cuda())
+        mask_correlations_radial_cuda( corr, targets, radius, alpha );
+    else
+        TORCH_CHECK(false, "TODO");
+}
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("forward_agg", &forward_agg, "forward aggregation (CUDA)");
+  m.def("forward_pool_agg", &forward_pool_agg, "forward pooling and aggregation (CUDA)");
+  m.def("backward_agg_unpool", &backward_agg_unpool, "backward sparse-conv and max-unpooling (C++ & CUDA)");
+  m.def("max_pool3d", &max_pool3d, "max_pool3d that can handle big inputs (CUDA)");
+  m.def("merge_corres_one_side", &merge_corres, "merge correspondences on CPU or GPU" );
+  m.def("mask_correlations_radial", &mask_correlations_radial, "mask correlations radially (CUDA)" );
+}

core/cuda_deepm/kernels.cu ADDED Viewed

	@@ -0,0 +1,578 @@

+// Copyright 2022-present NAVER Corp.
+// CC BY-NC-SA 4.0
+// Available only for non-commercial use
+#include <torch/extension.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <vector>
+#define MIN(x, y)           ((x) < (y) ? (x) : (y))
+#define MAX(x, y)           ((x) < (y) ? (y) : (x))
+#define inf std::numeric_limits<float>::infinity()
+#define CHECK_CUDA(tensor) {\
+    TORCH_CHECK((tensor).is_cuda(), #tensor " is not in cuda memory"); \
+    TORCH_CHECK((tensor).is_contiguous(), #tensor " is not contiguous"); }
+void CHECK_KERNEL() {auto error = cudaGetLastError(); TORCH_CHECK( error == cudaSuccess, cudaGetErrorString(error));}
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 600
+#define atomicMax_block atomicMax
+#endif
+template <typename scalar_t>
+__global__ void forward_agg_cuda_kernel(
+        const int LH1, const int LW1, const int LH2, const int LW2,
+        const int gap_left, const int gap_right, float norm,
+        const torch::PackedTensorAccessor64<scalar_t,4,torch::RestrictPtrTraits> lower,
+              torch::PackedTensorAccessor64<scalar_t,4,torch::RestrictPtrTraits> upper,
+        const float* weights, float* new_weights ) {
+    const auto UH1 = LH1 + bool(!gap_left); // level 0 is smaller than other levels
+    const auto UW1 = LW1 + bool(!gap_left);
+    const auto UH2 = LH2;
+    const auto UW2 = LW2;
+    int64_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+    const int uw2 = idx % UW2; idx /= UW2;
+    const int uh2 = idx % UH2; idx /= UH2;
+    const int uw1 = idx % UW1; idx /= UW1;
+    const int uh1 = idx;
+    if (uh1 >= UH1) return;
+    // then, add the 4 child
+    float sumw = 0, nrm = 0, res = 0;
+    // #pragma unroll
+    for (int i = 0; i < 4; i++) {
+        const int v = i/2, u = i%2;
+        // source pixel
+        const int lh1 = uh1 + (1-v) * gap_left - v * gap_right;
+        if (lh1 < 0 || lh1 >= LH1) continue;
+        const int lw1 = uw1 + (1-u) * gap_left - u * gap_right;
+        if (lw1 < 0 || lw1 >= LW1) continue;
+        // load weight even if (lh2,lw2) are invalid
+        const float weight = weights ? weights[lh1*LW1 + lw1] : 1;
+        sumw += weight;
+        const int lh2 = uh2 + 1 - 2*v;
+        if (lh2 < 0 || lh2 >= LH2) continue;
+        const int lw2 = uw2 + 1 - 2*u;
+        if (lw2 < 0 || lw2 >= LW2) continue;
+        res += weight * lower[lh1][lw1][lh2][lw2];
+        nrm += weight;
+    }
+    // normalize output
+    nrm = sumw * (nrm < sumw ? powf(nrm/sumw, norm) : 1);
+    upper[uh1][uw1][uh2][uw2] = (nrm ? res / nrm : 0);
+    if (uh2 == 1 && uw2 == 1)
+        new_weights[uh1*UW1 + uw1] = sumw;
+}
+torch::Tensor forward_agg_cuda( int level, float norm, const torch::Tensor lower,
+                                const at::optional<at::Tensor> weights, torch::Tensor upper ) {
+    CHECK_CUDA(lower);
+    CHECK_CUDA(upper);
+    if (weights) CHECK_CUDA(weights.value());
+    const auto UH1 = upper.size(0);
+    const auto UW1 = upper.size(1);
+    const auto UH2 = upper.size(2);
+    const auto UW2 = upper.size(3);
+    const auto LH1 = lower.size(0);
+    const auto LW1 = lower.size(1);
+    const auto LH2 = lower.size(2);
+    const auto LW2 = lower.size(3);
+    TORCH_CHECK( UH1 == LH1 + int(level==1) && UW1 == LW1 + int(level==1), "inconsistent lower and upper shapes" );
+    const int gap_left = (level >= 2) ? 1 << (level-2) : 0; // 0, 1, 2, 4, ...
+    const int gap_right= 1 << MAX(0, level-2);              // 1, 1, 2, 4, ...
+    const int MAX_THREADS = 512; // faster than 1024 (higher SM occupancy)
+    const int THREADS_PER_BLOCK = MAX_THREADS;
+    const int N_BLOCKS = (UH1*UW1*UH2*UW2 + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
+    torch::Tensor new_weights = torch::zeros({UH1, UW1}, upper.options().dtype(torch::kFloat32));
+    // one block for each layer, one thread per local-max
+    AT_DISPATCH_FLOATING_TYPES_AND_HALF(lower.type(), "forward_agg_cuda", ([&] {
+        forward_agg_cuda_kernel<<<N_BLOCKS, THREADS_PER_BLOCK>>>(
+            LH1, LW1, LH2, LW2,
+            gap_left, gap_right, norm,
+            lower.packed_accessor64<scalar_t,4,torch::RestrictPtrTraits>(),
+            upper.packed_accessor64<scalar_t,4,torch::RestrictPtrTraits>(),
+            weights ? weights->data_ptr<float>() : nullptr, new_weights.data_ptr<float>() );
+    }));
+    return new_weights;
+}
+template <typename scalar_t>
+__global__ void forward_pool_agg_cuda_kernel(
+        const int LH1, const int LW1, const int LH2, const int LW2,
+        // const int UH1, const int UW1, const int UH2, const int UW2,
+        const int gap_left, const int gap_right, float norm,
+        const torch::PackedTensorAccessor64<scalar_t,4,torch::RestrictPtrTraits> lower,
+              torch::PackedTensorAccessor64<scalar_t,4,torch::RestrictPtrTraits> upper,
+        const float* weights, float* new_weights ) {
+    const auto UH1 = LH1 + bool(!gap_left); // level 0 is smaller than other levels
+    const auto UW1 = LW1 + bool(!gap_left);
+    const auto UH2 = (LH2-1)/2 + 1;
+    const auto UW2 = (LW2-1)/2 + 1;
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+    const int uw2 = idx % UW2; idx /= UW2;
+    const int uh2 = idx % UH2; idx /= UH2;
+    const int uw1 = idx % UW1; idx /= UW1;
+    const int uh1 = idx;
+    if (uh1 >= UH1) return;
+    // then, add the 4 child
+    float sumw = 0, nrm = 0, res = 0;
+    // #pragma unroll
+    for (int i = 0; i < 4; i++) {
+        const int v = i/2, u = i%2;
+        // source pixel
+        const int lh1 = uh1 + (1-v) * gap_left - v * gap_right;
+        if (lh1 < 0 || lh1 >= LH1) continue;
+        const int lw1 = uw1 + (1-u) * gap_left - u * gap_right;
+        if (lw1 < 0 || lw1 >= LW1) continue;
+        // load weight even if (lh2,lw2) are invalid
+        const float weight = weights ? weights[lh1*LW1 + lw1] : 1;
+        sumw += weight;
+        const int lh2_ = 2*(uh2 + 1 - 2*v); // position in lower
+        const int lw2_ = 2*(uw2 + 1 - 2*u);
+        float lower_max = -inf;
+        #pragma unroll
+        for (int j = -1; j <= 1; j++) {
+          const int lh2 = lh2_ + j;
+          if (lh2 < 0 || lh2 >= LH2) continue;
+          #pragma unroll
+          for (int i = -1; i <= 1; i++) {
+            const int lw2 = lw2_ + i;
+            if (lw2 < 0 || lw2 >= LW2) continue;
+            float l = lower[lh1][lw1][lh2][lw2];
+            lower_max = MAX(lower_max, l);
+        }}
+        if (lower_max == -inf) continue;
+        res += weight * lower_max;
+        nrm += weight;
+    }
+    // normalize output
+    nrm = sumw * (nrm < sumw ? powf(nrm/sumw, norm) : 1);
+    upper[uh1][uw1][uh2][uw2] = (nrm ? res / nrm : 0);
+    if (uh2 == 1 && uw2 == 1)
+        new_weights[uh1*UW1 + uw1] = sumw;
+}
+torch::Tensor forward_pool_agg_cuda( int level, float norm, const torch::Tensor lower,
+                                     const at::optional<at::Tensor> weights, torch::Tensor upper ) {
+    CHECK_CUDA(lower);
+    CHECK_CUDA(upper);
+    if (weights) CHECK_CUDA(weights.value());
+    const auto LH1 = lower.size(0);
+    const auto LW1 = lower.size(1);
+    const auto LH2 = lower.size(2);
+    const auto LW2 = lower.size(3);
+    const auto UH1 = upper.size(0);
+    const auto UW1 = upper.size(1);
+    const auto UH2 = upper.size(2);
+    const auto UW2 = upper.size(3);
+    TORCH_CHECK( UH1 == LH1 + int(level==1) && UW1 == LW1 + int(level==1), "inconsistent lower and upper shapes" );
+    TORCH_CHECK( UH2 == (LH2-1)/2+1 && UW2 == (LW2-1)/2+1, "lower level should be twice as big" );
+    const int gap_left = (level >= 2) ? 1 << (level-2) : 0; // 0, 1, 2, 4, ...
+    const int gap_right= 1 << MAX(0, level-2);              // 1, 1, 2, 4, ...
+    const int MAX_THREADS = 512; // faster than 1024 (higher SM occupancy)
+    const int THREADS_PER_BLOCK = MAX_THREADS;
+    const int N_BLOCKS = (UH1*UW1*UH2*UW2 + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
+    torch::Tensor new_weights = torch::zeros({UH1, UW1}, upper.options().dtype(torch::kFloat));
+    // one block for each layer, one thread per local-max
+    AT_DISPATCH_FLOATING_TYPES_AND_HALF(lower.type(), "forward_pool_agg_cuda", ([&] {
+        forward_pool_agg_cuda_kernel<<<N_BLOCKS, THREADS_PER_BLOCK>>>(
+            LH1, LW1, LH2, LW2,
+            // UH1, UW1, UH2, UW2,
+            gap_left, gap_right, norm,
+            lower.packed_accessor64<scalar_t,4,torch::RestrictPtrTraits>(),
+            upper.packed_accessor64<scalar_t,4,torch::RestrictPtrTraits>(),
+            weights ? weights->data<float>() : nullptr, new_weights.data<float>() );
+    }));
+    return new_weights;
+}
+__device__ inline int in(int lower, int var, int upper) {
+    return lower <= var && var < upper;
+}
+__device__ inline int sl(bool b) {
+    return b ? 1 : -1;
+}
+__device__ short atomicMaxShort(short* address, short val) {
+    unsigned int *base_address = (unsigned int *)((size_t)address & ~3); // multiple of 4
+    unsigned int order_from[] = {0x0010, 0x0032}; // either bytes[0:2] or bytes[2:4]
+    unsigned int from = order_from[((size_t)address & 3) / 2];
+    unsigned int order_back[] = {0x3254, 0x5410}; // right-to-left
+    unsigned int back = order_back[((size_t)address & 3) / 2];
+    unsigned int old, assumed, max_, new_;
+    old = *base_address;
+    do {
+        assumed = old;
+        max_ = max(val, (short)__byte_perm(old, 0, from)); // extract word
+        new_ = __byte_perm(old, max_, back); // replace word
+        old = atomicCAS(base_address, assumed, new_);
+    } while (assumed != old);
+    return old;
+}
+template <typename scalar_t>
+__device__ inline void TplAtomicMax_block( scalar_t* before, scalar_t after ) { assert(!"atomicMax not implemented for this dtype"); }
+template <>
+__device__ inline void TplAtomicMax_block( at::Half* before, at::Half after ) { atomicMaxShort( (int16_t*)before, *(int16_t*)&after ); }
+template <>
+__device__ inline void TplAtomicMax_block( float* before, float after ) { atomicMax_block( (int32_t*)before, *(int32_t*)&after ); }
+template <typename scalar_t>
+__global__ void backward_agg_unpool_cuda_kernel(
+        const int UH1, const int UW1,
+        const int UH2, const int UW2,
+        const int LH2, const int LW2,
+        const int gap_left, const int gap_right,
+        const torch::PackedTensorAccessor64<scalar_t,4,torch::RestrictPtrTraits> upper,
+              torch::PackedTensorAccessor64<scalar_t,4,torch::RestrictPtrTraits> lower ) {
+    /* Each block is going to take care of a single layer, i.e. lower[:,:,0::2,0::2].
+       the first thread is allocating some global memory and then frees it later.
+    */
+    // const int LH1 = gridDim.x;
+    // const int LW1 = gridDim.y;
+    const int lh1 = blockIdx.y;
+    const int lw1 = blockIdx.x;
+    const int UHW2 = UH2 * UW2; // upper layer size
+    __shared__ float* _shared_addr;
+    if (threadIdx.x == 0)
+        do{ _shared_addr = new float [2*UHW2]; } // for each upper place, we have (best, bestp)
+        while(!_shared_addr); // waiting for memory to be available...
+    __syncthreads();
+    float * layer_best = _shared_addr;
+    int * layer_bestp = (int*)(_shared_addr+1); //UHW);
+    assert( layer_best );
+    /* First pass: we recover the position and values of all local maxima in the layer
+    */
+    for (int idx = threadIdx.x; idx < UHW2; idx += blockDim.x) {
+        const int ux = idx % UW2;
+        const int uy = idx / UW2;
+        const int lx = 2*ux; // lower pos from upper pos
+        const int ly = 2*uy;
+        // argmax my local minima
+        float best = -inf;
+        int bestp = 0;
+        #pragma unroll
+        for (int j_= -1; j_<= 1; j_++) {
+          const int j = ly + j_;
+          if (j < 0 || j >= LH2) continue;
+          #pragma unroll
+          for (int i_= -1; i_<= 1; i_++) {
+            const int i = lx + i_;
+            if (i < 0 || i >= LW2) continue;
+            float cur = lower[lh1][lw1][j][i];
+            if (cur > best) { best = cur; bestp = j*LW2+i; }
+        }}
+        layer_best[2*idx] = best;
+        layer_bestp[2*idx] = bestp;
+    }
+    __syncthreads();
+    /* Second pass: we update the local maxima according to the upper layer
+    */
+    for (int idx = threadIdx.x; idx < UHW2; idx += blockDim.x) {
+        const int ux = idx % UW2;
+        const int uy = idx / UW2;
+        // max-pool the additional value from the upper layer
+        scalar_t add = 0;
+        for (int v = -gap_left; v <= gap_right; v += gap_right+gap_left) {
+          for (int u = -gap_left; u <= gap_right; u += gap_right+gap_left) {
+            const int uh1 = lh1 + v, uw1 = lw1 + u;
+            const int uh2 = uy+sl(v>0), uw2 = ux+sl(u>0);
+            if (in(0, uh1, UH1) && in(0, uw1, UW1) && in(0, uh2, UH2) && in(0, uw2, UW2))
+                add = MAX(add, upper[uh1][uw1][uh2][uw2]);
+        }}
+        // grab local maxima
+        float best = layer_best[2*idx];
+        int bestp = layer_bestp[2*idx];
+        const int lx = bestp % LW2;
+        const int ly = bestp / LW2;
+        // printf("UH=%d,UW=%d: uy=%d,ux=%d --> best=%g at ly=%d,lx=%d\n", UH,UW, uy,ux, best, ly,lx);
+        scalar_t* before = & lower[lh1][lw1][ly][lx];
+        scalar_t  after  = best + add;
+        TplAtomicMax_block<scalar_t>( before, after );
+    }
+    __syncthreads();
+    if (threadIdx.x == 0)
+        delete _shared_addr;
+}
+void backward_agg_unpool_cuda( int level, const torch::Tensor upper, torch::Tensor lower, bool exclude_borders ) {
+    CHECK_CUDA(lower);
+    CHECK_CUDA(upper);
+    const auto UH1 = upper.size(0);
+    const auto UW1 = upper.size(1);
+    const auto UH2 = upper.size(2);
+    const auto UW2 = upper.size(3);
+    const auto LH1 = lower.size(0);
+    const auto LW1 = lower.size(1);
+    const auto LH2 = lower.size(2);
+    const auto LW2 = lower.size(3);
+    TORCH_CHECK( UH1 == LH1 + int(level==1) && UW1 == LW1 + int(level==1), "inconsistent lower and upper shapes" );
+    const int xb = exclude_borders; // local_argmax cannot reach the bottom and right borders
+    const int gap_left = (level >= 2) ? 1 << (level-2) : 0; // 0, 1, 2, 4, ...
+    const int gap_right= 1 << MAX(0, level-2);              // 1, 1, 2, 4, ...
+    const int64_t MAX_THREADS = 1024;
+    const int64_t THREADS_PER_LAYER = MIN(UH2*UW2, MAX_THREADS);
+    // one block for each layer, one thread per local-max
+    AT_DISPATCH_FLOATING_TYPES_AND_HALF(upper.type(), "backward_agg_unpool_cuda", ([&] {
+        backward_agg_unpool_cuda_kernel<<<dim3(LW1,LH1), THREADS_PER_LAYER>>>(
+            UH1, UW1, UH2, UW2, LH2-xb, LW2-xb,
+            gap_left, gap_right,
+            upper.packed_accessor64<scalar_t,4,torch::RestrictPtrTraits>(),
+            lower.packed_accessor64<scalar_t,4,torch::RestrictPtrTraits>());
+    }));
+    CHECK_KERNEL();
+}
+template <typename scalar_t>
+__global__ void max_pool3d_cuda_kernel(
+        const int BS, const int NC, const int IH, const int IW, const int OH, const int OW,
+        const int ks, const int stride,
+        const torch::PackedTensorAccessor64<scalar_t,4,torch::RestrictPtrTraits> tensor,
+              torch::PackedTensorAccessor64<scalar_t,3,torch::RestrictPtrTraits> maxima,
+              torch::PackedTensorAccessor64<int64_t,    3,torch::RestrictPtrTraits> indices ) {
+    // each thread takes care of one output
+    int64_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+    const int x = idx % OW; idx /= OW;
+    const int y = idx % OH; idx /= OH;
+    const int b = idx;
+    if (b >= BS) return;
+    float best = -inf;
+    int64_t best_pos = 0;
+    for (int64_t c = 0; c < NC; c++) {
+      for (int j = stride*y; j < stride*y+ks; j++) {
+        for (int i = stride*x; i < stride*x+ks; i++) {
+            // assert( b < BS and c < NC and j < IH and i < IW );
+            float cur = tensor[b][c][j][i];
+            if (cur > best) {best = cur; best_pos = (c*IH + j)*IW+ i; }
+    }}}
+    // assert( b < BS and y < OH and x < OW );
+    maxima [b][y][x] = best;
+    indices[b][y][x] = best_pos;
+}
+void max_pool3d_cuda( const torch::Tensor tensor, const int kernel_size, const int stride,
+                            torch::Tensor maxima, torch::Tensor indices ) {
+    CHECK_CUDA(tensor);
+    TORCH_CHECK(tensor.dim() == 4, "tensor should be 4-dimensional: BxCxHxW");
+    const int BS = tensor.size(0);
+    const int NC = tensor.size(1);
+    const int IH = tensor.size(2); // input height
+    const int IW = tensor.size(3); // input width
+    // output size
+    TORCH_CHECK( maxima.sizes() == indices.sizes(), "maxima and indices should have the same shape" );
+    TORCH_CHECK( BS == maxima.size(0), "bad batch size" );
+    const int OH = maxima.size(1);
+    const int OW = maxima.size(2);
+    const int64_t THREADS_PER_LAYER = 512;
+    const int64_t N_BLOCKS = (BS*OH*OW + THREADS_PER_LAYER-1) / THREADS_PER_LAYER;
+    // one block for each layer, one thread per local-max
+    AT_DISPATCH_FLOATING_TYPES_AND_HALF(tensor.type(), "max_pool3d_cuda", ([&] {
+       max_pool3d_cuda_kernel<<<N_BLOCKS, THREADS_PER_LAYER>>>(
+            BS, NC, IH, IW, OH, OW, kernel_size, stride,
+            tensor. packed_accessor64<scalar_t,4,torch::RestrictPtrTraits>(),
+            maxima. packed_accessor64<scalar_t,3,torch::RestrictPtrTraits>(),
+            indices.packed_accessor64<int64_t,3,torch::RestrictPtrTraits>());
+    }));
+}
+__device__ inline float ptdot( const float* m, float x, float y ) {
+  return x*m[0] + y*m[1] + m[2];
+}
+__device__ inline float sqr(float v) {
+    return v*v;
+}
+__global__ void merge_corres_cuda_kernel(
+            const int OH, const int OW, const int OZ, const int IH, const int IW,
+            const float dmax2, int offset, const float* inv_rot, const int all_step,
+            const torch::PackedTensorAccessor32<float,3,torch::RestrictPtrTraits> corres_a,
+                  torch::PackedTensorAccessor32<float,3,torch::RestrictPtrTraits> all_corres_a ) {
+    // each thread takes care of one output
+    int64_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+    const int i = idx % OW; idx /= OW;
+    const int j = idx;
+    if (j >= OH) return;
+    const float tol2 = 2*2; // squared
+    auto all_cor = all_corres_a[j][i];
+    // center of the bin in the reference frame
+    float x = i*all_step + all_step/2;
+    float y = j*all_step + all_step/2;
+    // center of the bin on the rescaled+rotated image
+    float xr = ptdot( inv_rot + 0, x, y );
+    float yr = ptdot( inv_rot + 3, x, y );
+    // iterate on the nearby bins
+    int xb = (int)(0.5+ xr/4); // rescaled+rotated desc always has step 4
+    int yb = (int)(0.5+ yr/4);
+    float best = dmax2;
+    #pragma unroll
+    for (int _v = -1; _v <= 1; _v++) {
+      #pragma unroll
+      for (int _u = -1; _u <= 1; _u++) {
+        const int v = yb+_v, u = xb+_u;
+        if (!(in(0, v, IH) && in(0, u, IW))) continue;
+        auto cor = corres_a[v][u];
+        float d = sqr(cor[offset]-x) + sqr(cor[offset+1]-y);
+        if (d < best)  best = d;
+    }}
+    #pragma unroll
+    for (int _v = -1; _v <= 1; _v++) {
+      #pragma unroll
+      for (int _u = -1; _u <= 1; _u++) {
+        const int v = yb+_v, u = xb+_u;
+        if (!(in(0, v, IH) && in(0, u, IW))) continue;
+        auto cor = corres_a[v][u];
+        float d = sqr(cor[offset]-x) + sqr(cor[offset+1]-y);
+        if (d <= tol2*best) { // spatially close
+            // merge correspondence if score is better than actual
+            if (cor[4] > all_cor[4])
+              for (int k = 0; k < OZ; k++) all_cor[k] = cor[k];
+          }
+    }}
+}
+void merge_corres_cuda( const torch::Tensor corres, const int offset, const torch::Tensor _inv_rot,
+                        const float dmax, torch::Tensor all_corres, const int all_step ) {
+    CHECK_CUDA( corres );
+    CHECK_CUDA( all_corres );
+    CHECK_CUDA( _inv_rot );
+    TORCH_CHECK(_inv_rot.is_contiguous(), "inv_rot should be contiguous" );
+    const int IH = corres.size(0);
+    const int IW = corres.size(1);
+    const int IZ = corres.size(2);
+    const int OH = all_corres.size(0);
+    const int OW = all_corres.size(1);
+    const int OZ = all_corres.size(2);
+    TORCH_CHECK( IZ == OZ, "corres and all_corres should have the same shape[2]" );
+    const int THREADS_PER_LAYER = 512;
+    const int N_BLOCKS = (OH * OW + THREADS_PER_LAYER-1) / THREADS_PER_LAYER;
+    merge_corres_cuda_kernel<<<N_BLOCKS, THREADS_PER_LAYER>>>(
+        OH, OW, OZ, IH, IW, dmax*dmax, offset, _inv_rot.data_ptr<float>(), all_step,
+                corres.packed_accessor32<float,3,torch::RestrictPtrTraits>(),
+            all_corres.packed_accessor32<float,3,torch::RestrictPtrTraits>());
+    CHECK_KERNEL();
+}
+template <typename scalar_t>
+__global__ void mask_correlations_radial_cuda_kernel(
+            float radius, const float alpha,
+            const torch::PackedTensorAccessor32<float,3,torch::RestrictPtrTraits> targets,
+                  torch::PackedTensorAccessor64<scalar_t,4,torch::RestrictPtrTraits> corr ) {
+    #define H1 ((int)corr.size(0))
+    #define W1 ((int)corr.size(1))
+    #define H2 ((int)corr.size(2))
+    #define W2 ((int)corr.size(3))
+    // each block takes care of one layer corr[j,i,:,:]
+    const int j = blockIdx.x / W1;
+    const int i = blockIdx.x % W1;
+    if (j >= H1) return;
+    // read the target center
+    const float cx = targets[j][i][0];
+    const float cy = targets[j][i][1];
+    if (cx != cx || cy != cy) return; // undefined center
+    radius *= radius; // squared
+    const float alpha_out = (alpha > 1 ? 1 : alpha);
+    const float alpha_in = (alpha < 1 ? 1 : alpha);
+    for (int idx = threadIdx.x; idx < H2*W2; idx += blockDim.x) {
+        const int v = idx / W2;
+        const int u = idx % W2;
+        // compute weighting
+        float dis2 = sqr(u - cx) + sqr(v - cy);
+        float mul = alpha_in;
+        if (dis2 > radius)
+            mul = 1 - alpha_out*(1 - radius / dis2);
+        corr[j][i][v][u] *= mul;
+    }
+}
+void mask_correlations_radial_cuda( torch::Tensor corr, const torch::Tensor targets,
+                                    const float radius, const float alpha) {
+    CHECK_CUDA( corr );
+    CHECK_CUDA( targets );
+    const int THREADS_PER_LAYER = 512;
+    const int N_BLOCKS = H1*W1;
+    #undef H1
+    #undef W1
+    #undef H2
+    #undef W2
+    AT_DISPATCH_FLOATING_TYPES_AND_HALF(corr.type(), "mask_correlations_radial_cuda", ([&] {
+        mask_correlations_radial_cuda_kernel<<<N_BLOCKS, THREADS_PER_LAYER>>>(
+            radius, alpha,
+            targets.packed_accessor32<float,3,torch::RestrictPtrTraits>(),
+               corr.packed_accessor64<scalar_t,4,torch::RestrictPtrTraits>());
+    }));
+    CHECK_KERNEL();
+}

core/cuda_deepm/setup.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from setuptools import setup
+from torch import cuda
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+# if you want to compile for all possible CUDA architectures
+all_cuda_archs = [] #cuda.get_gencode_flags().replace('compute=','arch=').split()
+setup(
+    name='cuda_deepm',
+    ext_modules = [
+        CUDAExtension(
+                name = 'cuda_deepm',
+                sources = ["func.cpp", "kernels.cu"],
+                extra_compile_args = dict(nvcc=['-O2']+all_cuda_archs, cxx=['-O2'])
+                )
+    ],
+    cmdclass = {
+        'build_ext': BuildExtension
+    })

core/functional.py ADDED Viewed

	@@ -0,0 +1,440 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import numpy as np
+import torch
+import torch.nn.functional as F
+def affmul( aff, vecs ):
+    """ affine multiplication:
+        computes aff @ vecs.T """
+    if aff is None: return vecs
+    if isinstance(aff, (tuple,list)) or aff.ndim==3:
+        assert len(aff) == 2
+        assert 4 <= vecs.shape[-1], bb()
+        vecs = vecs.clone() if isinstance(vecs, torch.Tensor) else vecs.copy()
+        vecs[...,0:2] = affmul(aff[0], vecs[...,0:2])
+        vecs[...,2:4] = affmul(aff[1], vecs[...,2:4])
+        return vecs
+    else:
+        assert vecs.shape[-1] == 2, bb()
+        assert aff.shape == (2,3) or (aff.shape==(3,3) and
+               aff[2,0] == aff[2,1] == 0 and aff[2,2] == 1), bb()
+        return (vecs @ aff[:2,:2].T) + aff[:2,2]
+def imresize( img, max_size, mode='area' ):
+    # trf: cur_pix --> old_pix
+    img, trf = img if isinstance(img,tuple) else (img, torch.eye(3,device=img.device))
+    shape = img.shape[-2:]
+    if max_size > 0 and max(shape) > max_size:
+        new_shape = tuple(i * max_size // max(shape) for i in shape)
+        img = F.interpolate( img[None].float(), size=new_shape, mode=mode )[0]
+        img.clamp_(min=0, max=255)
+        sca = torch.diag(torch.tensor((shape[0]/new_shape[0],shape[1]/new_shape[1],1), device=img.device))
+        img = img.byte()
+        trf = trf @ sca # undo sca first
+    return img, trf
+def rotate_img( img, angle, crop=False ):
+    if angle in (0, 90, 180, 270):
+        return rotate_img_90(img,angle)
+    img, trf = img
+    assert trf.shape == (3,3)
+    def centered_rotation(rotation, shape, **device):
+        # rotation matrix
+        # pt_in_original_image = rot * pt_in_rotated_image
+        angle = rotation * np.pi / 180
+        c, s = np.cos(angle), np.sin(angle)
+        rot = torch.tensor([(c, -s, 0), (s, c, 0), (0, 0, 1)], dtype=torch.float32, **device)
+        # determine center of rotation before
+        H, W = shape
+        c_before = torch.tensor((W,H), **device) / 2
+        if crop:
+            c_after = c_before
+            rot_size = (W,H)
+        else:
+            # enlarge image to fit everything
+            corners = torch.tensor([(0, W, W, 0), (0, 0, H, H)], dtype=torch.float32, **device)
+            corners = affmul(rot, corners.T).T
+            rot_size = (corners.max(dim=1).values - corners.min(dim=1).values + 0.5).int()
+            rot_size = (rot_size // 4) * 4 # legacy
+            c_after = rot_size / 2
+        rot[:2,2] = c_before - affmul(rot, c_after) # fix translation
+        return rot, tuple(rot_size)[::-1]
+    C, H, W = img.shape
+    rot, (OH, OW) = centered_rotation(angle, (H,W), device=img.device)
+    # pt_in_original_image = rot * pt_in_rotated_image
+    # but pytorch works in [-1,1] coordinates... annoying
+    # pt_in_original_1_1 = orig_px_to_1_1 * rot * rotated_1_1_to_px * pt_in_rotated_1_1
+    _1_1_to_px = lambda W,H: torch.tensor(((W/2, 0, W/2), (0, H/2, H/2), (0, 0, 1)), device=img.device)
+    theta = torch.inverse(_1_1_to_px(W-1,H-1)) @ rot @ _1_1_to_px(OW-1,OH-1)
+    grid = F.affine_grid(theta[None,:2], (1, C, OH, OW), align_corners=True)
+    res = F.grid_sample(img[None].float(), grid, align_corners=True).to(dtype=img.dtype)[0]
+    return res, trf @ rot
+def rotate_img_90( img, angle ):
+    """ Rotate an image by a multiple of 90 degrees using simple transpose and flip ops.
+    img = tuple( image, existing_trf )
+        existing_trf: current --> old
+    """
+    angle = angle % 360
+    assert angle in (0, 90, 180, 270), 'cannot handle rotation other than multiple of 90 degrees'
+    img, trf = img
+    assert trf.shape == (3,3)
+    if isinstance(img, np.ndarray):
+        assert img.ndim == 3 and 1 <= img.shape[2] <= 3
+        new, x, y = np.float32, 1, 0
+        flip = lambda i,d: np.flip(i,axis=d)
+    elif isinstance(img, torch.Tensor):
+        assert img.ndim == 3 and 1 <= img.shape[0] <= 3
+        new, x, y = trf.new, -1, -2
+        flip = lambda i,d: i.flip(dims=[d])
+    H, W = img.shape[y], img.shape[x]
+    if angle == 90:
+        # point 0,0 --> (0, H-1); W-1,0 --> 0,0
+        img = flip(img.swapaxes(x,y),y)
+        trf = trf @ new([[0,-1,W-1],[1,0,0],[0,0,1]]) # inverse transform: new --> current
+    if angle == 180:
+        # point 0,0 --> (W-1, H-1)
+        img = flip(flip(img,x),y)
+        trf = trf @ new([[-1,0,W-1],[0,-1,H-1],[0,0,1]]) # inverse transform: new --> current
+    if angle == 270:
+        # point 0,0 --> (H-1, 0); 0,H-1 --> 0,0
+        img = flip(img.swapaxes(x,y),x)
+        trf = trf @ new([[0,1,0],[-1,0,H-1],[0,0,1]]) # inverse transform: new --> current
+    return img, trf
+def encode_scale_rot(scale, rot):
+    s = np.int32(np.rint(np.log(scale) / (0.5*np.log(2))))
+    r = np.int32(np.rint(((-rot) % 360) / 45)) % 8
+    return 8*s + (r%8)
+def decode_scale_rot( code ):
+    s = code // 8
+    r = (code % 8)
+    return 2 ** (s/2), -((45 * r + 180) % 360 - 180)
+def normalized_corr(patches, img, padding='ncc', extra_patch=False, ret_norms=False):
+    assert patches.ndim == 4, 'patches shape must be (H*W, C, K, K)'
+    P, C, K, K = patches.shape
+    assert img.ndim == 3 and img.shape[0] == C, 'img shape must be (C, W, H)'
+    eps = torch.finfo(patches.dtype).tiny
+    # normalize on patches side
+    norms = patches.view(P,-1).norm(dim=-1)
+    patches = patches / norms[:,None,None,None].clamp(min=eps)
+    # convolve normalized patches on unnormalized image
+    ninth = 0
+    if padding == 'ninth':
+        ninth = img[:,-1].mean() # ninth dimension
+    img = F.pad(img[None], (K//2,K//2)*2, mode='constant', value=ninth)[0]
+    corr = F.conv2d(img[None], patches, padding=0, bias=None)[0]
+    # normalize on img's side
+    ones = patches.new_ones((1, C, K, K))
+    local_norm = torch.sqrt(F.conv2d(img[None]**2, ones))[0]
+    corr /= local_norm
+    # normalize on patches' side (image borders)
+    if padding == 'ncc':
+        local_norm = torch.sqrt(F.conv2d(ones, patches**2, padding=2))[0]
+        local_norm.clamp_(min=eps)
+        for j in range(-2, 3):
+          for i in range(-2,3):
+            if i == j == 2: continue # normal case is already normalized
+            if i == 2: i = slice(2,-2)
+            if j == 2: j = slice(2,-2)
+            corr[:,j,i] /= local_norm[:,j,i]
+    return (corr, norms) if ret_norms else corr
+def true_corr_shape( corr_shape, level ):
+    H1, W1, H2, W2 = corr_shape[-4:]
+    if level > 0: # recover true size
+        H1, W1 = H1-1, W1-1
+    return corr_shape[:-4] + (H1, W1, H2, W2)
+def children(level, H1, W1, H2, W2):
+    """ level: parent level (> 1) """
+    gap = 2**(level-2)
+    # @ level 1: gap=0.5   (parent at x=1 has children at x=[0.5, 1.5])
+    # @ level 2: gap=1     (parent at x=1 has children at x=[0, 2])
+    # @ level 3: gap=2     (parent at x=2 has children at x=[0, 4])
+    #   etc.
+    def ravel_child(x, y):
+        # x,y is he center of the child patch
+        inside = (0 <= x <= W1) and (0 <= y <= H1)
+        if gap < 1:
+            assert x % 1 == y % 1 == 0.5, bb()
+            return int((x-0.5) + (y-0.5) * W1) if inside else -1
+        else:
+            assert x % 1 == y % 1 == 0, bb()
+            return int(x + y * (W1+1)) if inside else -1
+    # 4 children for each parent patch (top-left, top-right, bot-left, bot-right, -1 = None)
+    parents = []
+    for h in range(H1+1):
+      for w in range(W1+1):
+        # enumerate the 4 children for this patch
+        children = [ravel_child(w + gap*tx, h + gap*ty) for ty in (-1,1) for tx in (-1,1)]
+        parents.append(children)
+    return torch.tensor(parents, dtype=torch.int64)
+def sparse_conv(level, corr, weights=None, reverse=False, norm=0.9):
+    H1, W1, H2, W2 = true_corr_shape(corr.shape, level-1 + reverse)
+    parents = children(level, H1, W1, H2, W2).to(corr.device)
+    n_parents = len(parents)
+    # perform the sparse convolution 'manually'
+    # since sparse convolutions are not implemented in pytorch currently
+    corr = corr.view(-1, *corr.shape[-2:])
+    if not reverse:
+        res = corr.new_zeros((n_parents+1,)+corr.shape[-2:]) # last one = garbage channel
+        nrm = corr.new_full((n_parents+1,3,3), 1e-8)
+        ones = nrm.new_ones((len(corr),1,1))
+        ex = 1
+        if weights is not None:
+            weights = weights.view(len(corr),1,1)
+            corr *= weights # apply weights to correlation maps without increasing memory footprint
+            ones *= weights
+    else:
+        assert corr._base is not None and corr._base.shape[0] == n_parents+1
+        corr._base[-1] = 0 # reset garbage layer
+        ex = 1 if level > 1 else 0
+        n_children = (H1+ex) * (W1+ex)
+        res = corr.new_zeros((n_children,)+corr.shape[-2:])
+    sl = lambda v: slice(0,-1 or None) if v < 0 else slice(1,None)
+    c = 0
+    for y in (-1, 1):
+        for x in (-1, 1):
+            src_layers = parents[:,c]; c+= 1
+            # we want to do: res += corr[src_layers]  (for all children != -1)
+            # but we only have 'res.index_add_()' <==> res[tgt_layers] += corr
+            tgt_layers = inverse_mapping(src_layers, max_elem=len(corr), default=n_parents)[:-1]
+            if not reverse:
+                # All of corr's channels MUST be utilized. for level>1, this doesn't hold,
+                # so we'll send them to a garbage channel ==> res[n_parents]
+                sel = good_slice( tgt_layers < n_parents )
+                res[:,sl(-y),sl(-x)].index_add_(0, tgt_layers[sel], corr[sel,sl(y),sl(x)])
+                nrm[:,sl(-y),sl(-x)].index_add_(0, tgt_layers[sel], ones[sel].expand(-1,2,2))
+            else:
+                ''' parent=199=11*17+12 @ (x=48, y=44) at level=1
+                    |-- child=171 @ (x=46,y=42) at level0
+                    |-- child=172 @ (x=50,y=42) at level0
+                    |-- child=187 @ (x=46,y=46) at level0
+                    |-- child=188 @ (x=50,y=46) at level0
+                '''
+                out = res[:,sl(y),sl(x)]
+                sel = tgt_layers[:n_children]
+                torch.maximum(out, corr._base[sel,sl(-y),sl(-x)], out=out)
+    if not reverse:
+        if weights is not None: corr /= weights.clamp(min=1e-12) # cancel weights
+        weights = norm_borders(res, nrm, norm=norm)[:-1]
+        res = res[:-1] # remove garbage channel
+    res = res.view(H1+ex, W1+ex, *res.shape[-2:])
+    return res if reverse else (res, weights)
+def norm_borders( res, nrm, norm=0.9 ):
+    """ apply some border normalization, modulated by `norm`
+        - if norm=0: no normalization at all
+        - if norm=1: full normalization
+    Formula: nrm = k * (nrm/k)**p = k**(1-p) * nrm**p,
+        with k=nrm[:,1,1] and p=norm
+    """
+    new_weights = nrm[...,1,1].clone()
+    nrm = (nrm[...,1:2,1:2] ** (1-norm)) * (nrm ** norm)
+    # assert not torch.isnan(nrm).any()
+    # normalize results on the borders
+    res[...,0   ,0   ] /= nrm[...,0  ,0  ]
+    res[...,0   ,1:-1] /= nrm[...,0  ,1:2]
+    res[...,0   ,  -1] /= nrm[...,0  ,2  ]
+    res[...,1:-1,0   ] /= nrm[...,1:2,0  ]
+    res[...,1:-1,1:-1] /= nrm[...,1:2,1:2]
+    res[...,1:-1,  -1] /= nrm[...,1:2,2  ]
+    res[...,  -1,0   ] /= nrm[...,2  ,0  ]
+    res[...,  -1,1:-1] /= nrm[...,2  ,1:2]
+    res[...,  -1,  -1] /= nrm[...,2  ,2  ]
+    return new_weights
+def inverse_mapping( map, max_elem=None, default=None):
+    """ given a mapping {i:j} we output {j:i}
+        (the mapping is a torch array)
+    """
+    assert isinstance(map, torch.Tensor) and map.ndim == 1
+    if max_elem is None: max_elem = map.max()
+    if default is None:
+        index = torch.empty(max_elem+1, dtype=torch.int64, device=map.device) # same size as corr, last elem == garbage
+    else:
+        index = torch.full((max_elem+1,), default, dtype=torch.int64, device=map.device) # same size as corr, last elem == garbage
+    index[map] = torch.arange(len(map), device=map.device)
+    return index
+def good_slice( nonzero ):
+    good = nonzero.nonzero().ravel()
+    return slice(good.min().item(), good.max().item()+1)
+def max_unpool(upper, lower, exclude_border=True):
+    # re-compute max-pool indices
+    if exclude_border:
+        # apparently, we cannot unpool on the bottom and right borders in legacy code (local_argmax with ex=1)
+        _, pos = F.max_pool2d(lower[:,:,:-1,:-1], 3, padding=1, stride=2, return_indices=True, ceil_mode=True)
+        W1 = lower.shape[-1]
+        pos = (pos//(W1-1))*W1 + (pos%(W1-1)) # fix the shortening
+    else:
+        _, pos = F.max_pool2d(lower, 3, padding=1, stride=2, return_indices=True)
+    # because there are potential collisions between overlapping 3x3 cells,
+    # that pytorch does not handle, we unpool in 4 successive non-overlapping steps.
+    for i in range(2):
+      for j in range(2):
+        # stride=0 instead of 1 because pytorch does some size checking, this is a hack
+        tmp = F.max_unpool2d(upper[:,:,i::2,j::2], pos[:,:,i::2,j::2], kernel_size=3, padding=0, stride=4, output_size=lower.shape[-2:])
+        if i == j == 0:
+            res = tmp
+        else:
+            torch.maximum(res, tmp, out=res)
+    # add scores to existing lower correlation map
+    lower += res
+    return lower
+def mgrid( shape, **kw ):
+    """ Returns in (x, y) order (contrary to numpy which is (y,x)  """
+    if isinstance(shape, torch.Tensor): shape = shape.shape
+    res = torch.meshgrid(*[torch.arange(n, dtype=torch.float32, **kw) for n in shape], indexing='ij')
+    return torch.stack(res[::-1], dim=-1).view(-1,2)
+def check_corres( corres, step, rot=None ):
+    H, W, two = corres.shape
+    assert two == 2
+    if isinstance(corres, np.ndarray):
+        corres = torch.from_numpy(corres)
+    if rot is not None:
+        corres = affmul(rot, corres)
+    gt = mgrid(corres.shape[:2]).view(H,W,2)
+    assert ((gt - corres // step).abs() <= 2).float().mean() > 0.99, bb()
+def best_correspondences(corr):
+    """ All positions are returned as x1, y1, x2, y2
+    """
+    if isinstance(corr, tuple): return corr # for legacy
+    H1, W1, H2, W2 = corr.shape
+    fix1 = lambda arr: 4*arr+2 # center of cells in img1
+    div = lambda a,b: torch.div(a, b, rounding_mode='trunc') # because of warning in pytorch 1.9+
+    # best scores in img1
+    score1, pos1 = corr.view(H1, W1, H2*W2).max(dim=-1)
+    pos1 = torch.cat((fix1(mgrid(score1, device=pos1.device)), pos1.view(-1,1)%W2, div(pos1.view(-1,1),W2)), dim=-1)
+    # best scores in img2
+    score2, pos2 = max_pool3d( corr, kernel_size=4, stride=4 )
+    pos2, score2 = pos2.view(-1,1), score2.squeeze()
+    pos2 = torch.cat((fix1(div(pos2,W2*H2)%W1), fix1(div(pos2,(W1*H2*W2))), pos2%W2, div(pos2,W2)%H2), dim=-1).float()
+    return (pos1, score1), (pos2, score2)
+def intersection( set1_, set2_ ):
+    """ Returns the indices of values in set1 that are duplicated in set2
+    """
+    set1, map1 = set1_.squeeze().unique(return_inverse=True) # map1: i1 -> j1
+    set2 = set2_.squeeze().unique()
+    combined = torch.cat((set1, set2))
+    uniques, inverse, counts = combined.unique(return_counts=True, return_inverse=True)
+    # j -> u, i -> j, j -> n
+    # we are interested only in (j -> i) for n > 1:
+    # assert counts.max() <= 2, 'there were non-unique values in either set1 or set2'+bb()
+    # intersected_values = uniques[counts > 1]
+    inverse1 = inverse_mapping(inverse[:len(set1)], max_elem=len(uniques)-1)
+    intersected_indices1 = inverse1[counts>1]
+    return inverse_mapping(map1, max_elem=len(set1)-1)[intersected_indices1]
+def reciprocal(self, corres1, corres2 ):
+    pos1, score1 = corres1
+    pos2, score2 = corres2
+    (H1, W1), (H2, W2) = score1.shape, map(lambda i: 4*i+1, score2.shape)
+    to_int = pos1.new_tensor((W1*H2*W2, H2*W2, W2, 1), dtype=torch.float32)
+    inter1 = intersection(pos1@to_int, pos2@to_int)
+    res = torch.cat((pos1[inter1], score1.view(-1,1)[inter1], 0*score1.view(-1,1)[inter1]), dim=-1)
+    return res
+def max_pool3d( corr, kernel_size=4, stride=4 ):
+    H1, W1, H2, W2 = corr.shape
+    ks, st = kernel_size, stride
+    if corr.numel() >= 2**31 and corr.device != torch.device('cpu'):
+        # re-implementation due to a bug in pytorch
+        import core.cuda_deepm as kernels
+        return kernels.max_pool3d( corr.view(1, H1*W1, H2, W2), kernel_size, stride)
+    else:
+        return F.max_pool3d( corr.view(1, 1, H1*W1, H2, W2), kernel_size=(H1*W1,ks,ks), stride=(1,st,st), return_indices=True)
+def forward_cuda(self, level, lower, weights=None, pooled=False):
+    import core.cuda_deepm as kernels # must be imported after torch_set_gpu()
+    assert lower.numel() < 2**31, 'please use cuda-lowmem, pytorch cannot handle big tensors'
+    pooled = lower if pooled else F.max_pool2d(lower, 3, padding=1, stride=2)
+    return kernels.forward_agg(level, self.border_inv, pooled, weights)
+def forward_cuda_lowmem(self, level, lower, weights=None):
+    import core.cuda_deepm as kernels # must be imported after torch_set_gpu()
+    return kernels.forward_pool_agg(level, self.border_inv, lower, weights)
+def backward_cuda(self, level, pyramid):
+    import core.cuda_deepm as kernels # must be imported after torch_set_gpu()
+    kernels.backward_agg_unpool(level, pyramid[level], pyramid[level-1], True)
+    # assert not torch.isnan(pyramid[level-1]).any(), bb()
+    return pyramid[level-1]
+def merge_corres(self, corres, rots, all_corres, code):
+    " rot : reference --> rotated "
+    all_step = self.matcher.pixel_desc.get_atomic_patch_size() // 2 # step size in all_corres
+    dev = all_corres[0][1].device
+    # stack correspondences
+    corres = [torch.cat((p.view(*s.shape,4),s[:,:,None],torch.full_like(s[:,:,None],code)),dim=2) for (p,s) in corres]
+    import core.cuda_deepm as kernels # must be imported after torch_set_gpu()
+    kernels.merge_corres_one_side( corres[0].to(dev), 0, rots[0].to(dev), all_corres[0][1], all_step )
+    kernels.merge_corres_one_side( corres[1].to(dev), 2, rots[1].to(dev), all_corres[1][1], all_step )

core/losses/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from .multiloss import MultiLoss
+from .pixel_ap_loss import PixelAPLoss
+from .ap_loss_sampler import NghSampler
+from .unsupervised_deepmatching_loss import DeepMatchingLoss

core/losses/ap_loss.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import torch
+import torch.nn as nn
+class APLoss (nn.Module):
+    """ differentiable AP loss, through quantization.
+        Input: (N, M)   values in [min, max]
+        label: (N, M)   values in {0, 1}
+        Returns: list of query AP (for each n in {1..N})
+                 Note: typically, you want to minimize 1 - mean(AP)
+    """
+    def __init__(self, nq=25, min=0, max=1, euc=False):
+        nn.Module.__init__(self)
+        assert isinstance(nq, int) and 2 <= nq <= 100
+        self.nq = nq
+        self.min = min
+        self.max = max
+        self.euc = euc
+        gap = max - min
+        assert gap > 0
+        # init quantizer = non-learnable (fixed) convolution
+        self.quantizer = q = nn.Conv1d(1, 2*nq, kernel_size=1, bias=True).requires_grad_(False)
+        a = (nq-1) / gap
+        #1st half = lines passing to (min+x,1) and (min+x+1/a,0) with x = {nq-1..0}*gap/(nq-1)
+        q.weight.data[:nq] = -a
+        q.bias.data[:nq] = a*min + torch.arange(nq, 0, -1) # b = 1 + a*(min+x)
+        #2nd half = lines passing to (min+x,1) and (min+x-1/a,0) with x = {nq-1..0}*gap/(nq-1)
+        q.weight.data[nq:] = a
+        q.bias.data[nq:] = torch.arange(2-nq, 2, 1) - a*min # b = 1 - a*(min+x)
+        # first and last one are special: just horizontal straight line
+        q.weight.data[0] = q.weight.data[-1] = 0
+        q.bias.data[0] = q.bias.data[-1] = 1
+    def compute_AP(self, x, label):
+        N, M = x.shape
+        if self.euc:  # euclidean distance in same range than similarities
+            x = 1 - torch.sqrt(2.001 - 2*x)
+        # quantize all predictions
+        q = self.quantizer(x.unsqueeze(1))
+        q = torch.min(q[:,:self.nq], q[:,self.nq:]).clamp(min=0) # N x Q x M
+        nbs = q.sum(dim=-1) # number of samples  N x Q = c
+        rec = (q * label.view(N,1,M).float()).sum(dim=-1) # nb of correct samples = c+ N x Q
+        prec = rec.cumsum(dim=-1) / (1e-16 + nbs.cumsum(dim=-1)) # precision
+        rec /= rec.sum(dim=-1).unsqueeze(1) # norm in [0,1]
+        ap = (prec * rec).sum(dim=-1) # per-image AP
+        return ap
+    def forward(self, x, label):
+        assert x.shape == label.shape # N x M
+        return self.compute_AP(x, label)

core/losses/ap_loss_sampler.py ADDED Viewed

	@@ -0,0 +1,131 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class NghSampler (nn.Module):
+    """ Given dense feature maps and pixel-dense flow,
+        compute a subset of all correspondences and return their scores and labels.
+    Distance to GT =>  0 ... pos_d ... neg_d ... ngh
+    Pixel label    =>  + + + + + + 0 0 - - - - - - -
+    Subsample on query side: if > 0, regular grid
+                                < 0, random points
+    In both cases, the number of query points is = W*H/subq**2
+    """
+    def __init__(self, ngh, subq=-8, subd=1, pos_d=2, neg_d=4, border=16, subd_neg=-8):
+        nn.Module.__init__(self)
+        assert 0 <= pos_d < neg_d <= (ngh if ngh else 99)
+        self.ngh = ngh
+        self.pos_d = pos_d
+        self.neg_d = neg_d
+        assert subd <= ngh or ngh == 0
+        assert subq != 0
+        self.sub_q = subq
+        self.sub_d = subd
+        self.sub_d_neg = subd_neg
+        if border is None: border = ngh
+        assert border >= ngh, 'border has to be larger than ngh'
+        self.border = border
+        self.precompute_offsets()
+    def precompute_offsets(self):
+        pos_d2 = self.pos_d**2
+        neg_d2 = self.neg_d**2
+        rad2 = self.ngh**2
+        rad = (self.ngh//self.sub_d) * self.ngh # make an integer multiple
+        pos = []
+        neg = []
+        for j in range(-rad, rad+1, self.sub_d):
+          for i in range(-rad, rad+1, self.sub_d):
+            d2 = i*i + j*j
+            if d2 <= pos_d2:
+                pos.append( (i,j) )
+            elif neg_d2 <= d2 <= rad2:
+                neg.append( (i,j) )
+        self.register_buffer('pos_offsets', torch.LongTensor(pos).view(-1,2).t())
+        self.register_buffer('neg_offsets', torch.LongTensor(neg).view(-1,2).t())
+    def gen_grid(self, step, aflow):
+        B, two, H, W = aflow.shape
+        dev = aflow.device
+        b1 = torch.arange(B, device=dev)
+        if step > 0:
+            # regular grid
+            x1 = torch.arange(self.border, W-self.border, step, device=dev)
+            y1 = torch.arange(self.border, H-self.border, step, device=dev)
+            H1, W1 = len(y1), len(x1)
+            shape = (B, H1, W1)
+            x1 = x1[None,None,:].expand(B,H1,W1).reshape(-1)
+            y1 = y1[None,:,None].expand(B,H1,W1).reshape(-1)
+            b1 = b1[:,None,None].expand(B,H1,W1).reshape(-1)
+        else:
+            # randomly spread
+            n = (H - 2*self.border) * (W - 2*self.border) // step**2
+            x1 = torch.randint(self.border, W-self.border, (n,), device=dev)
+            y1 = torch.randint(self.border, H-self.border, (n,), device=dev)
+            x1 = x1[None,:].expand(B,n).reshape(-1)
+            y1 = y1[None,:].expand(B,n).reshape(-1)
+            b1 = b1[:,None].expand(B,n).reshape(-1)
+            shape = (B, n)
+        return b1, y1, x1, shape
+    def forward(self, feats, confs, aflow, **kw):
+        B, two, H, W = aflow.shape
+        assert two == 2, bb()
+        feat1, conf1 = feats[0], (confs[0] if confs else None)
+        feat2, conf2 = feats[1], (confs[1] if confs else None)
+        # positions in the first image
+        b_, y1, x1, shape = self.gen_grid(self.sub_q, aflow)
+        # sample features from first image
+        feat1 = feat1[b_, :, y1, x1]
+        qconf = conf1[b_, :, y1, x1].view(shape) if confs else None
+        #sample GT from second image
+        xy2 = (aflow[b_, :, y1, x1] + 0.5).long().t()
+        mask = (0 <= xy2[0]) * (0 <= xy2[1]) * (xy2[0] < W) * (xy2[1] < H)
+        mask = mask.view(shape)
+        def clamp(xy):
+            torch.clamp(xy[0], 0, W-1, out=xy[0])
+            torch.clamp(xy[1], 0, H-1, out=xy[1])
+            return xy
+        # compute positive scores
+        xy2p = clamp(xy2[:,None,:] + self.pos_offsets[:,:,None])
+        pscores = torch.einsum('nk,ink->ni', feat1, feat2[b_, :, xy2p[1], xy2p[0]])
+        # compute negative scores
+        xy2n = clamp(xy2[:,None,:] + self.neg_offsets[:,:,None])
+        nscores = torch.einsum('nk,ink->ni', feat1, feat2[b_, :, xy2n[1], xy2n[0]])
+        if self.sub_d_neg:
+            # add distractors from a grid
+            b3, y3, x3 = self.gen_grid(self.sub_d_neg, aflow)[:3]
+            distractors = feat2[b3, :, y3, x3]
+            dscores = torch.einsum('nk,ik->ni', feat1, distractors)
+            del distractors
+            # remove scores that corresponds to positives or nulls
+            x2, y2 = xy2 = xy2.float()
+            xy3 = torch.stack((x3,y3)).float()
+            dis2 = torch.cdist((xy2+b_*512).T, (xy3+b3*512).T, compute_mode='donot_use_mm_for_euclid_dist')
+            dscores[dis2 < self.neg_d] = 0
+            scores = torch.cat((pscores, nscores, dscores), dim=1)
+        gt = scores.new_zeros(scores.shape, dtype=torch.uint8)
+        gt[:, :pscores.shape[1]] = 1
+        return scores, gt, mask, qconf

core/losses/multiloss.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from tools.trainer import backward
+class MultiLoss (nn.Module):
+    """ This functions handles both supervised and unsupervised samples.
+    """
+    def __init__(self, loss_sup, loss_unsup, alpha=0.3, inner_bw=True):
+        super().__init__()
+        assert 0 <= alpha
+        self.alpha_sup = 1 # coef of self-supervised loss
+        self.loss_sup = loss_sup
+        self.alpha_unsup = alpha # coef of unsupervised loss
+        self.loss_unsup = loss_unsup
+        self.inner_bw = inner_bw
+    def forward(self, desc1, desc2, homography, **kw):
+        sl_sup, sl_unsup = split_batch_sup_unsup(homography, 512 if self.inner_bw else 8)
+        inner_bw = self.inner_bw and self.training and torch.is_grad_enabled()
+        if inner_bw: (desc1, desc1_), (desc2, desc2_) = pause_gradient((desc1,desc2))
+        kw['desc1'], kw['desc2'], kw['homography'] = desc1, desc2, homography
+        (sup_name, sup_loss) ,= self.loss_sup(backward_loss=inner_bw*self.alpha_sup, **{k:v[sl_sup] for k,v in kw.items()}).items()
+        if inner_bw and sup_loss: sup_loss = backward(sup_loss) # backward to desc1 and desc2
+        (uns_name, uns_loss) ,= self.loss_unsup(**{k:v[sl_unsup] for k,v in kw.items()}).items()
+        uns_loss = self.alpha_unsup * uns_loss
+        if inner_bw and uns_loss: uns_loss = backward(uns_loss) # backward to desc1 and desc2
+        loss = sup_loss + uns_loss
+        return {'loss':(loss, [(desc1_,desc1.grad),(desc2_,desc2.grad)]), sup_name:float(sup_loss), uns_name:float(uns_loss)}
+def pause_gradient( objs ):
+    return [(obj.detach().requires_grad_(True), obj) for obj in objs]
+def split_batch_sup_unsup(homography, max_sup=512):
+    # split batch in supervised / unsupervised
+    i = int(torch.isfinite(homography[:,0,0]).sum()) # first ocurence
+    sl_sup, sl_unsup = slice(0, min(i,max_sup)), slice(i, None)
+    assert torch.isfinite(homography[sl_sup]).all(), 'batch is not properly sorted!'
+    assert torch.isnan(homography[sl_unsup]).all(), 'batch is not properly sorted!'
+    return sl_sup, sl_unsup

core/losses/pixel_ap_loss.py ADDED Viewed

	@@ -0,0 +1,82 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .ap_loss import APLoss
+from datasets.utils import applyh
+class PixelAPLoss (nn.Module):
+    """ Computes the pixel-wise AP loss:
+        Given two images and ground-truth optical flow, computes the AP per pixel.
+        feat1:  (B, C, H, W)   pixel-wise features extracted from img1
+        feat2:  (B, C, H, W)   pixel-wise features extracted from img2
+        aflow:  (B, 2, H, W)   absolute flow: aflow[...,y1,x1] = x2,y2
+    """
+    def __init__(self, sampler, nq=20, inner_bw=False, bw_step=256):
+        nn.Module.__init__(self)
+        self.aploss = APLoss(nq, min=0, max=1, euc=False)
+        self.name = 'pixAP'
+        self.sampler = sampler
+        self.inner_bw = inner_bw
+        self.bw_step = bw_step
+    def loss_from_ap(self, ap, rel):
+        return 1 - ap
+    def forward(self, desc1, desc2, homography, backward_loss=None, **kw):
+        if len(desc1) == 0: return dict(ap_loss=0)
+        aflow = aflow_from_H(homography, desc1)
+        descriptors = (desc1, desc2)
+        scores, gt, msk, qconf = self.sampler(descriptors, kw.get('reliability'), aflow)
+        # compute pixel-wise AP
+        n = msk.numel()
+        if n == 0: return 0
+        scores, gt = scores.view(n,-1), gt.view(n,-1)
+        backward_loss = backward_loss or self.inner_bw
+        if self.training and torch.is_grad_enabled() and backward_loss:
+            # progressive loss computation and backward, low memory but slow
+            scores_, qconf_ = scores, qconf if qconf is not None else scores.new_ones(msk.shape)
+            scores = scores.detach().requires_grad_(True)
+            qconf = qconf_.detach().requires_grad_(True)
+            msk = msk.ravel()
+            loss = 0
+            for i in range(0, n, self.bw_step):
+                sl = slice(i, i+self.bw_step)
+                ap = self.aploss(scores[sl], gt[sl])
+                pixel_loss = self.loss_from_ap(ap, qconf.ravel()[sl] if qconf is not None else None)
+                l = backward_loss / msk.sum() * pixel_loss[msk[sl]].sum()
+                loss += float(l)
+                l.backward() # cumulate gradient
+            loss = (loss, [(scores_,scores.grad)])
+            if qconf_.requires_grad: loss[1].append((qconf_,qconf.grad))
+        else:
+            ap = self.aploss(scores, gt).view(msk.shape)
+            pixel_loss = self.loss_from_ap(ap, qconf)
+            loss = pixel_loss[msk].mean()
+        return dict(ap_loss=loss)
+def make_grid(B, H, W, device ):
+    b = torch.arange(B, device=device).view(B,1,1).expand(B,H,W)
+    y = torch.arange(H, device=device).view(1,H,1).expand(B,H,W)
+    x = torch.arange(W, device=device).view(1,1,W).expand(B,H,W)
+    return b.view(B,H*W), torch.stack((x,y),dim=-1).view(B,H*W,2)
+def aflow_from_H( H_1to2, feat1 ):
+    B, _, H, W = feat1.shape
+    b, pos1 = make_grid(B,H,W, feat1.device)
+    pos2 = applyh(H_1to2, pos1.float())
+    return pos2.view(B,H,W,2).permute(0,3,1,2)

core/losses/unsupervised_deepmatching_loss.py ADDED Viewed

	@@ -0,0 +1,146 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from core import functional as myF
+class DeepMatchingLoss (nn.Module):
+    """ This loss is based on DeepMatching (IJCV'16).
+    atleast:    (int) minimum image size at which the pyramid construction stops.
+    sub:        (int) prior subsampling
+    way:        (str) which way to compute the asymmetric matching ('1', '2' or '12')
+    border:     (int) ignore pixels too close to the border
+    rectify_p:  (float) non-linear power-rectification in DeepMatching
+    eps:        (float) epsilon for the L1 normalization. Kinda handles unmatched pixels.
+    """
+    def __init__(self, eps=0.03, atleast=5, sub=2, way='12', border=16, rectify_p=1.5):
+        super().__init__()
+        assert way in ('1','2','12')
+        self.subsample = sub
+        self.border = border
+        self.way = way
+        self.atleast = atleast
+        self.rectify_p = rectify_p
+        self.eps = eps
+        self._cache = {}
+    def rectify(self, corr):
+        corr = corr.clip_(min=0)
+        corr = corr ** self.rectify_p
+        return corr
+    def forward(self, desc1, desc2, **kw):
+        # 1 --> 2
+        loss1 = self.forward_oneway(desc1, desc2, **kw) \
+                if '1' in self.way else 0
+        # 2 --> 1
+        loss2 = self.forward_oneway(desc2, desc1, **kw) \
+                if '2' in self.way else 0
+        return dict(deepm_loss=(loss1+loss2)/len(self.way))
+    def forward_oneway(self, desc1, desc2, dbg=(), **kw):
+        assert desc1.shape[:2] == desc2.shape[:2]
+        # prior subsampling
+        s = slice(self.border, -self.border or None, self.subsample)
+        desc1, desc2 = desc1[...,s,s], desc2[...,s,s]
+        desc1 = desc1[:,:,2::4,2::4] # subsample patches in 1st image
+        B, D, H1, W1, H2, W2 = desc1.shape + desc2.shape[-2:]
+        if B == 0: return 0 # empty batch
+        # intial 4D correlation volume
+        corr = torch.bmm(desc1.reshape(B,D,-1).transpose(1,2), desc2.reshape(B,D,-1)).view(B,H1,W1,H2,W2)
+        # build pyramid
+        pyramid = self.deep_matching(corr)
+        corr = pyramid[-1] # high-level correlation
+        corr = self.rectify(corr)
+        # L1 norm
+        B, H1, W1, H2, W2 = corr.shape
+        corr = corr / (corr.reshape(B,H1*W1,-1).sum(dim=-1).view(B,H1,W1,1,1) + self.eps)
+        # squared L2 norm
+        loss = - torch.square(corr).sum() / (B*H1*W1)
+        return loss
+    def deep_matching(self, corr):
+        # print(f'level=0 {corr.shape=}')
+        weights = None
+        pyramid = [corr]
+        for level in range(1,999):
+            corr, weights = self.forward_level(level, corr, weights)
+            pyramid.append(corr)
+            # print(f'{level=} {corr.shape=}')
+            if weights.sum() == 0: break # img1 has become too small
+            if min(corr.shape[-2:]) < 2*self.atleast: break # img2 has become too small
+        return pyramid
+    def forward_level(self, level, corr, weights):
+        B, H1, W1, H2, W2 = corr.shape
+        # max-pooling
+        pooled = F.max_pool2d(corr.view(B,H1*W1,H2,W2), 3, padding=1, stride=2)
+        pooled = pooled.view(B, H1, W1, *pooled.shape[-2:])
+        # print(f'rectifying corr at {level=}')
+        pooled = self.rectify(pooled)
+        # sparse conv
+        key = level, H1, W1, H2, W2
+        if key not in self._cache:
+            B, H1, W1, H2, W2 = myF.true_corr_shape(pooled.shape, level-1)
+            self._cache[key] = myF.children(level, H1, W1, H2, W2).to(corr.device)
+        return sparse_conv(level, pooled, self._cache[key], weights)
+def sparse_conv(level, corr, parents, weights=None, border_norm=0.9):
+    B, H1, W1, H2, W2 = myF.true_corr_shape(corr.shape, level-1)
+    n_cache = len(parents)
+    # perform the sparse convolution 'manually'
+    # since sparse convolutions are not implemented in pytorch currently
+    corr = corr.view(B, -1, H2, W2)
+    res = corr.new_zeros((B, n_cache+1, H2, W2)) # last one = garbage channel
+    nrm = corr.new_full((n_cache+1, 3, 3), torch.finfo(corr.dtype).eps)
+    ones = nrm.new_ones((corr.shape[1], 1, 1))
+    ex = 1
+    if weights is not None:
+        weights = weights.view(corr.shape[1],1,1)
+        corr = corr * weights[None] # apply weights to correlation maps beforehand
+        ones *= weights
+    sl = lambda v: slice(0,-1 or None) if v < 0 else slice(1,None)
+    c = 0
+    for y in (-1, 1):
+        for x in (-1, 1):
+            src_layers = parents[:,c]; c+= 1
+            # we want to do: res += corr[src_layers]  (for all children != -1)
+            # but we only have 'res.index_add_()' <==> res[tgt_layers] += corr
+            tgt_layers = myF.inverse_mapping(src_layers, max_elem=corr.shape[1], default=n_cache)[:-1]
+            # All of corr's channels MUST be utilized. for level>1, this doesn't hold,
+            # so we'll send them to a garbage channel ==> res[n_cache]
+            sel = myF.good_slice( tgt_layers < n_cache )
+            res[:,:,sl(-y),sl(-x)].index_add_(1, tgt_layers[sel], corr[:,sel,sl(y),sl(x)])
+            nrm[  :,sl(-y),sl(-x)].index_add_(0, tgt_layers[sel], ones[sel].expand(-1,2,2))
+    # normalize borders
+    weights = myF.norm_borders(res, nrm, norm=border_norm)[:-1]
+    res = res[:,:-1] # remove garbage channel
+    return res.view(B, H1+ex, W1+ex, *res.shape[-2:]), weights

core/pixel_desc.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.transforms as tvf
+from core.conv_mixer import ConvMixer
+norm_RGB = tvf.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+class PixelDesc (nn.Module):
+    def __init__(self, path='models/PUMP_st.pt'):
+        super().__init__()
+        state_dict = torch.load( path, 'cpu' )
+        self.pixel_desc = ConvMixer(output_dim=128, hidden_dim=512, depth=7, patch_size=4, kernel_size=9).eval()
+        self.pixel_desc.load_state_dict(state_dict)
+    def configure(self, pipeline):
+        # hot-update of the default HOG-based pipeline
+        pipeline.__class__ = type(type(pipeline).__name__+'_Trained', (DescPipeline, type(pipeline)), {})
+        return self
+    def get_atomic_patch_size(self):
+        return 4
+    def forward(self, img, stride=1, offset=0):
+        if img.ndim == 3: img = img[None]
+        trf = torch.eye(3, device=img.device)
+        desc = self.pixel_desc( img )
+        desc = desc[..., offset::stride, offset::stride].contiguous() # free memory
+        return desc, trf
+class DescPipeline:
+    def extract_descs(self, img1, img2, dtype=None):
+        # this will rotate the image if needed
+        img1, sca1 = self.demultiplex_img_trf(img1)
+        img2, sca2 = self.demultiplex_img_trf(img2)
+        # convert to float and normalize std
+        fimg1, fimg2 = [norm_RGB(img.type(dtype)/255) for img in (img1, img2)]
+        self.pixel_desc.type(fimg1.dtype)
+        desc1, trf1 = self.pixel_desc(fimg1, stride=4, offset=2)
+        desc2, trf2 = self.pixel_desc(fimg2)
+        return (img1, img2), (desc1.type(dtype), desc2.type(dtype)), (sca1@trf1, sca2@trf2)
+    def first_level(self, desc1, desc2, **kw):
+        B, C, H, W = desc1.shape
+        weights = desc1.permute(0, 2, 3, 1).view(H*W, C, 1, 1) # rearrange(desc1, '1 C H W -> (H W) C 1 1')
+        corr = F.conv2d(desc2, weights, padding=0, bias=None)[0]
+        norms = torch.ones(desc1.shape[-2:], device=corr.device)
+        return corr.view(desc1.shape[-2:]+desc2.shape[-2:]), norms

datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from .image_set import *
+from .web_images import RandomWebImages
+from .pair_dataset import *
+from .pair_loader import *
+from .sfm120k import *

datasets/demo_warp/mountains_src.jpg ADDED Viewed

datasets/demo_warp/mountains_tgt.jpg ADDED Viewed

datasets/image_set.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import os
+from os.path import *
+from PIL import Image
+class ImageSet(object):
+    """ Base class for an image dataset.
+    """
+    def __init__(self, root, imgs):
+        self.root = root
+        self.imgs = imgs
+        assert imgs, f'Empty image set in {root}'
+    def init_from_folder(self, *args, **kw):
+        imset = ImageSet.from_folder(*args, **kw)
+        ImageSet.__init__(self, imset.root, imset.imgs)
+    def __len__(self):
+        return len(self.imgs)
+    def get_image_path(self, idx):
+        return os.path.join(self.root, self.imgs[idx])
+    def get_image(self, idx):
+        fname = self.get_image_path(idx)
+        try:
+            return Image.open(fname).convert('RGB')
+        except Exception as e:
+            raise IOError("Could not load image %s (reason: %s)" % (fname, str(e)))
+    __getitem__ = get_image
+    @staticmethod
+    def from_folder(root, exts=('.jpg','.jpeg','.png','.ppm'), recursive=False, listing=False, check_imgs=False):
+        """
+        recursive: bool or func. If a function, it must evaluate True to the directory name.
+        """
+        if listing:
+            if listing is True: listing = f"list_imgs{'_recursive' if recursive else ''}.txt"
+            flist = join(root, listing)
+            try: return ImageSet.from_listing(root,flist)
+            except IOError: print(f'>> ImageSet.from_folder(listing=True): entering {root}...')
+        if check_imgs is True: # default verif function
+            check_imgs = verify_img
+        for _, dirnames, dirfiles in os.walk(root):
+            imgs = sorted([f for f in dirfiles if f.lower().endswith(exts)])
+            if check_imgs: imgs = [img for img in imgs if check_imgs(join(root,img))]
+            if recursive:
+                for dirname in sorted(dirnames):
+                    if callable(recursive) and not recursive(join(root,dirname)): continue
+                    imset = ImageSet.from_folder(join(root,dirname), exts=exts, recursive=recursive, listing=listing, check_imgs=check_imgs)
+                    imgs += [join(dirname,f) for f in imset.imgs]
+            break # recursion is handled internally
+        if listing:
+            try: open(flist,'w').write('\n'.join(imgs))
+            except IOError: pass # write permission denied
+        return ImageSet(root, imgs)
+    @staticmethod
+    def from_listing(root, list_path):
+        return ImageSet(root, open(list_path).read().splitlines())
+    def circular_pad(self, min_size):
+        assert self.imgs, 'cannot pad an empty image set'
+        while len(self.imgs) < min_size:
+            self.imgs += self.imgs # artifically augment size
+        self.imgs = self.imgs[:min_size or None]
+        return self
+    def __repr__(self):
+        prefix = os.path.commonprefix((self.get_image_path(0),self.get_image_path(len(self)-1)))
+        return f'{self.__class__.__name__}({len(self)} images from {prefix}...)'
+def verify_img(path, exts=None):
+    if exts and not path.lower().endswith(exts): return False
+    try:
+        Image.open(path).convert('RGB') # try to open it
+        return True
+    except:
+        return False

datasets/pair_dataset.py ADDED Viewed

	@@ -0,0 +1,226 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import os, os.path as osp
+from tqdm import tqdm
+from PIL import Image
+import numpy as np
+import torch
+from .image_set import ImageSet
+from .transforms import instanciate_transforms
+from .utils import DatasetWithRng
+invh = np.linalg.inv
+class ImagePairs (DatasetWithRng):
+    """ Base class for a dataset that serves image pairs.
+    """
+    imgs = None # regular image dataset
+    pairs = [] # list of (idx1, idx2), ...
+    def __init__(self, image_set, pairs, trf=None, **rng):
+        assert image_set and pairs, 'empty images or pairs'
+        super().__init__(**rng)
+        self.imgs = image_set
+        self.pairs = pairs
+        self.trf = instanciate_transforms(trf, rng=self.rng)
+    def __len__(self):
+        return len(self.pairs)
+    def __getitem__(self, idx):
+        transform = self.trf or (lambda x:x)
+        pair = tuple(map(transform, self._load_pair(idx)))
+        return pair, {}
+    def _load_pair(self, idx):
+        i,j = self.pairs[idx]
+        img1 = self.imgs.get_image(i)
+        return (img1, img1) if i == j else (img1, self.imgs.get_image(j))
+    def __repr__(self):
+        return f'{self.__class__.__name__}({len(self)} pairs from {self.imgs})'
+class StillImagePairs (ImagePairs):
+    """ A dataset of 'still' image pairs used for debugging purposes.
+    """
+    def __init__(self, image_set, pairs=None, **rng):
+        if isinstance(image_set, ImagePairs):
+            super().__init__(image_set.imgs, pairs or image_set.pairs, **rng)
+        else:
+            super().__init__(image_set, pairs or [(i,i) for i in range(len(image_set))], **rng)
+    def __getitem__(self, idx):
+        img1, img2 = self._load_pair(idx)
+        sx, sy = img2.size / np.float32(img1.size)
+        return (img1, img2), dict(homography=np.diag(np.float32([sx, sy, 1])))
+class SyntheticImagePairs (StillImagePairs):
+    """ A synthetic generator of image pairs.
+        Given a normal image dataset, it constructs pairs using random homographies & noise.
+    scale: prior image scaling.
+    distort: distortion applied independently to (img1,img2) if sym=True else just img2
+    sym: (bool) see above.
+    """
+    def __init__(self, image_set, scale='', distort='', sym=False, **rng):
+        super().__init__(image_set, **rng)
+        self.symmetric = sym
+        self.scale = instanciate_transforms(scale, rng=self.rng)
+        self.distort = instanciate_transforms(distort, rng=self.rng)
+    def __getitem__(self, idx):
+        (img1, img2), gt = super().__getitem__(idx)
+        img1 = dict(img=img1, homography=np.eye(3,dtype=np.float32))
+        if img1['img'] is img2:
+            img1 = self.scale(img1)
+            img2 = self.distort(dict(img1))
+            if self.symmetric: img1 = self.distort(img1)
+        else:
+            if self.symmetric: img1 = self.distort(self.scale(img1))
+            img2 = self.distort(self.scale(dict(img=img2, **gt)))
+        return (img1['img'], img2['img']), dict(homography=img2['homography'] @ invh(img1['homography']))
+    def __repr__(self):
+        format = lambda s: ','.join(l.strip() for l in repr(s).splitlines() if l).replace(',','',1)
+        return f"{self.__class__.__name__}({len(self)} images, scale={format(self.scale)}, distort={format(self.distort)})"
+class CatImagePairs (DatasetWithRng):
+    """ Concatenation of several ImagePairs datasets
+    """
+    def __init__(self, *pair_datasets, seed=torch.initial_seed()):
+        assert all(isinstance(db, ImagePairs) for db in pair_datasets)
+        self.pair_datasets = pair_datasets
+        DatasetWithRng.__init__(self, seed=seed) # init last
+        self._init()
+    def _init(self):
+        self._pair_offsets = np.cumsum([0] + [len(db) for db in self.pair_datasets])
+        self.npairs = self._pair_offsets[-1]
+    def __len__(self):
+        return self.npairs
+    def __repr__(self):
+        fmt_str = f"{type(self).__name__}({len(self)} pairs,"
+        for i,db in enumerate(self.pair_datasets):
+            npairs = self._pair_offsets[i+1] - self._pair_offsets[i]
+            fmt_str += f'\n\t{npairs} from '+str(db).replace("\n"," ") + ','
+        return fmt_str[:-1] + ')'
+    def __getitem__(self, idx):
+        b, i = self._which(idx)
+        return self.pair_datasets[b].__getitem__(i)
+    def _which(self, i):
+        pos = np.searchsorted(self._pair_offsets, i, side='right')-1
+        assert pos < self.npairs, 'Bad pair index %d >= %d' % (i, self.npairs)
+        return pos, i - self._pair_offsets[pos]
+    def _call(self, func, i, *args, **kwargs):
+        b, j = self._which(i)
+        return getattr(self.pair_datasets[b], func)(j, *args, **kwargs)
+    def init_worker(self, tid):
+        for db in self.pair_datasets:
+            db.init_worker(tid)
+class BalancedCatImagePairs (CatImagePairs):
+    """ Balanced concatenation of several ImagePairs datasets
+    """
+    def __init__(self, npairs=0, *pair_datasets, **kw):
+        assert isinstance(npairs, int) and npairs >= 0, 'BalancedCatImagePairs(npairs != int)'
+        assert len(pair_datasets) > 0, 'no dataset provided'
+        if len(pair_datasets) >= 3 and isinstance(pair_datasets[1], int):
+            assert len(pair_datasets) % 2 == 1
+            pair_datasets = [npairs] + list(pair_datasets)
+            npairs, pair_datasets = pair_datasets[0::2], pair_datasets[1::2]
+            assert all(isinstance(n, int) for n in npairs)
+            self._pair_offsets = np.cumsum([0]+npairs)
+            self.npairs = self._pair_offsets[-1]
+        else:
+            self.npairs = npairs or max(len(db) for db in pair_datasets)
+            self._pair_offsets = np.linspace(0, self.npairs, len(pair_datasets)+1).astype(int)
+        CatImagePairs.__init__(self, *pair_datasets, **kw)
+    def set_epoch(self, epoch):
+        DatasetWithRng.init_worker(self, epoch) # random seed only depends on the epoch
+        self._init() # reset permutations for this epoch
+    def init_worker(self, tid):
+        CatImagePairs.init_worker(self, tid)
+    def _init(self):
+        self._perms = []
+        for i,db in enumerate(self.pair_datasets):
+            assert len(db), 'cannot balance if there is an empty dataset'
+            avail = self._pair_offsets[i+1] - self._pair_offsets[i]
+            idxs = np.arange(len(db))
+            while len(idxs) < avail:
+                idxs = np.r_[idxs,idxs]
+            if self.seed: # if not seed, then no shuffle
+                self.rng.shuffle(idxs[(avail//len(db))*len(db):])
+            self._perms.append( idxs[:avail] )
+        # print(self._perms)
+    def _which(self, i):
+        pos, idx = super()._which(i)
+        return pos, self._perms[pos][idx]
+class UnsupervisedPairs (ImagePairs):
+    """ Unsupervised image pairs obtained from SfM
+    """
+    def __init__(self, img_set, pair_file_path):
+        assert isinstance(img_set, ImageSet), bb()
+        self.pair_list = self._parse_pair_list(pair_file_path)
+        self.corres_dir = osp.join(osp.split(pair_file_path)[0], 'corres')
+        tag_to_idx = {n:i for i,n in enumerate(img_set.imgs)}
+        img_indices = lambda pair: tuple([tag_to_idx[n] for n in pair])
+        super().__init__(img_set, [img_indices(pair) for pair in self.pair_list])
+    def __repr__(self):
+        return f"{type(self).__name__}({len(self)} pairs from {self.imgs})"
+    def _parse_pair_list(self, pair_file_path):
+        res = []
+        for row in open(pair_file_path).read().splitlines():
+            row = row.split()
+            if len(row) != 2: raise IOError()
+            res.append((row[0], row[1]))
+        return res
+    def get_corres_path(self, pair_idx):
+        img1, img2 = [osp.basename(self.imgs.imgs[i]) for i in self.pairs[pair_idx]]
+        return osp.join(self.corres_dir, f'{img1}_{img2}.npy')
+    def get_corres(self, pair_idx):
+        return np.load(self.get_corres_path(pair_idx))
+    def __getitem__(self, idx):
+        img1, img2 = self._load_pair(idx)
+        return (img1, img2), dict(corres=self.get_corres(idx))
+if __name__ == '__main__':
+    from datasets import *
+    from tools.viz import show_random_pairs
+    db = BalancedCatImagePairs(
+                3125, SyntheticImagePairs(RandomWebImages(0,52),distort='RandomTilting(0.5)'),
+                4875, SyntheticImagePairs(SfM120k_Images(),distort='RandomTilting(0.5)'),
+                8000, SfM120k_Pairs())
+    show_random_pairs(db)

datasets/pair_loader.py ADDED Viewed

	@@ -0,0 +1,291 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+from PIL import Image
+import numpy as np
+from core import functional as myF
+from tools.common import todevice
+from .transforms import instanciate_transforms
+from .utils import *
+class FastPairLoader (DatasetWithRng):
+    """ On-the-fly generation of related image pairs
+    crop:   random crop applied to both images
+    scale:  random scaling applied to img2
+    distort: random ditorsion applied to img2
+    self[idx] returns: (img1, img2), dict(homography=)
+        (homography: 3x3 array, can be nan)
+    """
+    def __init__(self, dataset, crop=256, transform='', p_flip=0, p_swap=0, scale_jitter=0, seed=None):
+        super().__init__(seed)
+        self.dataset = self.with_same_rng(dataset)
+        self.transform = instanciate_transforms( transform, rng=self.rng )
+        self.crop_size = crop
+        self.p_swap = p_swap
+        self.p_flip = p_flip
+        self.scale_jitter = abs(np.log1p(scale_jitter))
+    def __len__(self):
+        return len(self.dataset)
+    def __repr__(self):
+        fmt_str = f'FastPairLoader({self.dataset},\n'
+        short_repr = lambda s: repr(s).strip().replace('\n',', ')[14:-1].replace('    ',' ')
+        fmt_str += '    Transform:\t%s\n' % short_repr(self.transform)
+        fmt_str +=f'    Crop={self.crop_size}, scale_jitter=x{np.exp(self.scale_jitter):g}, p_swap={self.p_swap:g}'
+        return fmt_str
+    def init_worker(self, tid):
+        super().init_worker(tid)
+        self.dataset.init_worker(tid)
+    def set_epoch(self, epoch):
+        self.dataset.set_epoch(epoch)
+    def __getitem__(self, idx):
+        self.init_worker(idx) # preserve RNG for this pair
+        (img1, img2), gt = self.dataset[idx]
+        if self.rng.random() < self.p_swap:
+            img1, img2 = img2, img1
+            if 'homography' in gt: gt['homography'] = invh(gt['homography'])
+            if 'corres' in gt: gt['corres'] = swap_corres(gt['corres'])
+        if self.rng.random() < self.p_flip:
+            img1, img2, gt = flip_image_pair(img1, img2, gt)
+        # apply transformations to the second image
+        img2 = self.transform(dict(img=img2))
+        homography, corres = spatial_relationship( img1, img2, gt )
+        # find a good window
+        img1, img2 = map(self._pad_rgb_numpy, (img1, img2['img']))
+        if not 'debug':
+            from tools.viz import show_correspondences
+            print(np.median(corres[:,5]))
+            show_correspondences(img1, img2, corres, bb=bb)
+        def windows_from_corres( idx, scale_jitter=1 ):
+            c = corres[idx]
+            p1, p2, scale = c[0:2], c[2:4], c[6]
+            scale *= scale_jitter
+            # make windows based on scaling
+            win1 = window(*p1, self.crop_size, max(1, 1/scale), img1.shape)
+            win2 = window(*p2, self.crop_size, max(1, scale/1), img2.shape)
+            return win1, win2
+        best = 0, None
+        for idx in self.rng.choice(len(corres), size=min(len(corres),5), replace=False):
+            # pick a correspondence at random
+            win1, win2 = windows_from_corres( idx )
+            # check how many matches are in the 2 windows
+            score = score_windows(is_in(corres[:,0:2],win1), is_in(corres[:,2:4],win2))
+            if score > best[0]: best = score, idx
+        others = {}
+        if None in best: # counldn't find a good window
+            img1 = img2 = np.zeros((self.crop_size,self.crop_size,3), dtype=np.uint8)
+            corres = np.empty((0, 6), dtype=np.float32)
+        else:
+            # jitter scales
+            scale_jitter = np.exp(self.rng.uniform(-self.scale_jitter, self.scale_jitter))
+            win1, win2 = windows_from_corres( best[1], scale_jitter )
+            # print(win1, win2, img1.shape, img2.shape)
+            img1, img2 = imresize(img1[win1], self.crop_size), imresize(img2[win2], self.crop_size)
+            trf1, trf2 = wintrf(win1, img1), wintrf(win2, img2)
+            # fix rotation if necessary
+            angle_scores = np.bincount(corres[:,5].astype(int) % 8)
+            rot90 = int((((angle_scores.argmax() + 4) % 8) - 4) / 2)
+            if rot90: # rectify rotation
+                img2, trf = myF.rotate_img_90((img2, np.eye(3)), 90*rot90)
+                trf2 = invh(trf) @ trf2
+            homography = trf2 @ homography @ invh(trf1)
+            corres = myF.affmul((trf1,trf2), corres)
+        f32c = lambda i,**kw: np.require(i, requirements='CWAE', **kw)
+        return (f32c(img1), f32c(img2)), dict(homography = f32c(homography, dtype=np.float32), corres=corres, **others)
+    def _pad_rgb_numpy(self, img):
+        if img.mode != 'RGB':
+            img = img.convert('RGB')
+        if min(img.size) < self.crop_size:
+            w, h = img.size
+            result = Image.new('RGB', (max(w,self.crop_size), max(h,self.crop_size)), 0)
+            result.paste(img, (0, 0))
+            img = result
+        return np.asarray(img)
+def swap_corres( corres ):  # swap img1 and img2
+    res = corres.copy()
+    res[:,[0,1,2,3]] = corres[:,[2,3,0,1]]
+    if corres.shape[1] > 4: # invert rotation and scale
+        scale, rot = myF.decode_scale_rot(corres[:,5])
+        res[:,5] = myF.encode_scale_rot(1/scale, -rot)
+    return res
+def flip(img):
+    w, h = img.size
+    return img.transpose(Image.FLIP_LEFT_RIGHT), np.float32( [[-1,0,w-1],[0,1,0],[0,0,1]] )
+def flip_image_pair(img1, img2, gt):
+    img1, F1 = flip(img1)
+    img2, F2 = flip(img2)
+    res = {}
+    for key, value in gt.items():
+        if key == 'homography':
+            res['homography'] = F2 @ value @ F1
+        elif key == 'aflow':
+            assert False, 'flip for aflow: todo'
+        elif key == 'corres':
+            new_corres = np.c_[applyh(F1,value[:,0:2]), applyh(F2,value[:,2:4])]
+            if value.shape[1] == 4: pass
+            elif value.shape[1] == 6:
+                scale, rot = myF.decode_scale_rot(value[:,5])
+                new_code = myF.encode_scale_rot(scale, -rot)
+                new_corres = np.c_[new_corres,value[:,4],new_code]
+            res['corres'] = new_corres
+        else:
+            raise ValueError(f"flip_image_pair: bad gt field '{key}'")
+    return img1, img2, res
+def spatial_relationship( img1, img2, gt ):
+    if 'homography' in gt:
+        homography = gt['homography']
+        if 'homography' in img2:
+            homography = np.float32(img2['homography']) @ homography
+        corres = corres_from_homography(homography, *img1.size)
+    elif 'corres' in gt:
+        homography = np.full((3,3), np.nan, dtype=np.float32)
+        corres = gt['corres']
+        if 'homography' in img2:
+            corres[:,2:4] = applyh(img2['homography'], corres[:,2:4])
+        else:
+            img2['homography'] = np.eye(3)
+        scales = np.sqrt(np.abs(np.linalg.det(jacobianh(img2['homography'], corres[:,0:2]).T)))
+        if corres.shape[1] == 4:
+            scales, rots = scale_rot_from_corres(corres)
+            corres = np.c_[corres, np.ones_like(scales), myF.encode_scale_rot(scales,rots*180/np.pi), scales]
+        elif corres.shape[1] == 6:
+            corres = np.c_[corres, scales * myF.decode_scale_rot(corres[:,5])[0]]
+        else:
+            assert ValueError(f'bad shape for corres: {corres.shape}')
+    return homography, corres
+def scale_rot_from_corres( corres, sub=256, nn=16 ):
+    # select a subset of relevant correspondences
+    sub = np.random.choice(len(corres), size=min(len(corres),sub), replace=False)
+    sub = corres[sub]
+    # for each corres, find the scale change w.r.t. its NNs
+    from scipy.spatial.distance import cdist
+    nns = cdist(corres, sub, metric='sqeuclidean').argsort(axis=1)[:,:nn]
+    # affine transform for this set of neighboring correspondences
+    pts = sub[nns] # shape = npts x sub x 4
+    # [P1,1] @ A = P2  with A = 3x2 matrix
+    # A = [P1,1]^-1 @ P2
+    P1, P2 = pts[:,:,0:2], pts[:,:,2:4] # each row = list of correspondences
+    P1 = np.concatenate((P1,np.ones_like(P1[:,:,:1])),axis=-1)
+    A = (np.linalg.pinv(P1) @ P2).transpose(0,2,1)
+    scale, (angy,angx) = detect_scale_rotation(A.transpose(1,2,0)[:,1::-1])
+    rot = np.arctan2(angy, angx)
+    return scale.clip(min=0.2, max=5), rot
+def window1(x, size, w):
+    l = x - int(0.5 + size / 2)
+    r = l + int(0.5 + size)
+    if l < 0: l,r = (0, r - l)
+    if r > w: l,r = (l + w - r, w)
+    if l < 0: l,r = 0,w # larger than width
+    return slice(l,r)
+def window(cx, cy, win_size, scale, img_shape):
+    return (window1(int(cy), win_size*scale, img_shape[0]),
+            window1(int(cx), win_size*scale, img_shape[1]))
+def is_in( pts, window ):
+    x, y = pts.T
+    sly, slx = window
+    return (slx.start <= x) & (x < slx.stop) & (sly.start <= y) & (y < sly.stop)
+def score_windows( valid1, valid2 ):
+    inter = (valid1 & valid2).sum()
+    iou1 = inter / (valid1.sum() + 1e-8)
+    iou2 = inter / (valid2.sum() + 1e-8)
+    return inter * min(iou1, iou2)
+def imresize( img, max_size, resample=Image.ANTIALIAS):
+    if max(img.shape[:2]) > max_size:
+        if img.shape[-1] == 2:
+            img = np.stack([np.float32(Image.fromarray(img[...,i]).resize((max_size,max_size), resample=resample)) for i in range(2)], axis=-1)
+        else:
+            img = np.asarray(Image.fromarray(img).resize((max_size,max_size), resample=resample))
+    assert img.shape[0] == img.shape[1] == max_size, bb()
+    return img
+def wintrf( window, final_img ):
+    wy, wx = window
+    H, W = final_img.shape[:2]
+    T = np.float32((((wx.stop-wx.start)/W, 0, wx.start),
+                    (0, (wy.stop-wy.start)/H, wy.start),
+                    (0, 0, 1)) )
+    return invh(T)
+def collate_ordered(batch, _use_shared_memory=True):
+    pairs, gt = zip(*batch)
+    imgs1, imgs2 = zip(*pairs)
+    assert len(imgs1) == len(imgs2) == len(gt) and isinstance(gt[0], dict)
+    # reorder samples (supervised ones first, unsupervised ones last)
+    supervised = [i for i,b in enumerate(gt) if np.isfinite(b['homography']).all()]
+    unsupervsd = [i for i,b in enumerate(gt) if np.isnan(b['homography']).any()]
+    order = supervised + unsupervsd
+    def collate( tensors, key=None ):
+        import torch
+        batch = todevice([tensors[i] for i in order], 'cpu')
+        if key == 'corres': return batch # cannot concat
+        if _use_shared_memory: # shared memory tensor to avoid an extra copy
+            numel = sum([x.numel() for x in batch])
+            storage = batch[0].storage()._new_shared(numel)
+            out = batch[0].new(storage)
+        return torch.stack(batch, dim=0, out=out)
+    return (collate(imgs1), collate(imgs2)), {k:collate([b[k] for b in gt],k) for k in gt[0]}
+if __name__ == '__main__':
+    from datasets import *
+    from tools.viz import show_random_pairs
+    db = BalancedCatImagePairs(
+                3125, SyntheticImagePairs(RandomWebImages(0,52),distort='RandomTilting(0.5)'),
+                4875, SyntheticImagePairs(SfM120k_Images(),distort='RandomTilting(0.5)'),
+                8000, SfM120k_Pairs())
+    db = FastPairLoader(db,
+            crop=256, transform='RandomRotation(20), RandomScale(256,1536,ar=1.3,can_upscale=True), PixelNoise()',
+            p_swap=0.5, p_flip=0.5, scale_jitter=0, seed=777)
+    show_random_pairs(db)

datasets/sfm120k.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+from os.path import *
+from .image_set import ImageSet
+from .pair_dataset import UnsupervisedPairs
+class SfM120k_Images (ImageSet):
+    def __init__(self, root='datasets/sfm120k'):
+        self.init_from_folder(join(root,'ims'), recursive=True, listing=True, exts='')
+class SfM120k_Pairs (UnsupervisedPairs):
+    def __init__(self, root='datasets/sfm120k'):
+        super().__init__(SfM120k_Images(root=root), join(root,'list_pairs.txt'))
+if __name__ == '__main__':
+    from tools.viz import show_random_pairs
+    db = SfM120k_Pairs()
+    show_random_pairs(db)

datasets/transforms.py ADDED Viewed

	@@ -0,0 +1,540 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import warnings
+import numpy as np
+from PIL import Image, ImageOps
+import torch
+import torch.nn as nn
+from torchvision import transforms as tvf
+from . import transforms_tools as F
+from .utils import DatasetWithRng
+'''
+Example command to try out some transformation chain:
+python -m pytools.transforms --trfs "Scale(384), ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.1), RandomRotation(10), RandomTilting(0.5, 'all'), RandomScale(240,320), RandomCrop(224)"
+'''
+def instanciate_transforms(transforms, use_gpu=False, rng=None, compose=True):
+    ''' Instanciate a sequence of transformations.
+    transforms: (str, list)
+        Comma-separated list of transformations.
+        Ex: "Rotate(10), Scale(256)"
+    '''
+    try:
+        transforms = transforms or '[]'
+        if isinstance(transforms, str):
+            if transforms.lstrip()[0] not in '[(': transforms = f'[{transforms}]'
+            if compose: transforms = f'Compose({transforms})'
+            transforms = eval(transforms)
+        if isinstance(transforms, list) and transforms and isinstance(transforms[0], str):
+            transforms = [eval(trf) for trf in transforms]
+            if compose: transforms = Compose(transforms)
+        if use_gpu and not isinstance(transforms, nn.Module):
+            while hasattr(transforms,'transforms') or hasattr(transforms,'transform'):
+                transforms = getattr(transforms,'transforms',getattr(transforms,'transform',None))
+            transforms = [trf for trf in transforms if isinstance(trf, nn.Module)]
+            transforms = nn.Sequential(*transforms) if compose else nn.ModuleList(transforms)
+        if transforms and rng:
+            for trf in transforms.transforms:
+                assert hasattr(trf, 'rng'), f"Transformation {trf} has no self.rng"
+                trf.rng = rng
+        if isinstance(transforms, Compose) and len(transforms.transforms) == 1:
+            transforms = transforms.transforms[0]
+        return transforms
+    except Exception as e:
+        print("\nError: Cannot interpret this transform list: %s\n" % transforms)
+        raise e
+class Compose (DatasetWithRng):
+    def __init__(self, transforms, **rng_seed):
+        super().__init__(**rng_seed)
+        self.transforms = [self.with_same_rng(trf) for trf in transforms]
+    def __call__(self, data):
+        for trf in self.transforms:
+            data = trf(data)
+        return data
+class Scale (DatasetWithRng):
+    """ Rescale the input PIL.Image to a given size.
+    Copied from https://github.com/pytorch in torchvision/transforms/transforms.py
+    The smallest dimension of the resulting image will be = size.
+    if largest == True: same behaviour for the largest dimension.
+    if not can_upscale: don't upscale
+    if not can_downscale: don't downscale
+    """
+    def __init__(self, size, interpolation=Image.BILINEAR, largest=False,
+                 can_upscale=True, can_downscale=True, **rng_seed):
+        super().__init__(**rng_seed)
+        assert isinstance(size, int) or (len(size) == 2)
+        self.size = size
+        self.interpolation = interpolation
+        self.largest = largest
+        self.can_upscale = can_upscale
+        self.can_downscale = can_downscale
+    def __repr__(self):
+        fmt_str = "RandomScale(%s" % str(self.size)
+        if self.largest: fmt_str += ', largest=True'
+        if not self.can_upscale: fmt_str += ', can_upscale=False'
+        if not self.can_downscale: fmt_str += ', can_downscale=False'
+        return fmt_str+')'
+    def get_params(self, imsize):
+        w,h = imsize
+        if isinstance(self.size, int):
+            cmp = lambda a,b: (a>=b) if self.largest else (a<=b)
+            if (cmp(w, h) and w == self.size) or (cmp(h, w) and h == self.size):
+                ow, oh = w, h
+            elif cmp(w, h):
+                ow = self.size
+                oh = int(self.size * h / w)
+            else:
+                oh = self.size
+                ow = int(self.size * w / h)
+        else:
+            ow, oh = self.size
+        return ow, oh
+    def __call__(self, inp):
+        img = F.grab(inp,'img')
+        w, h = img.size
+        size2 = ow, oh = self.get_params(img.size)
+        if size2 != img.size:
+            a1, a2 = img.size, size2
+            if (self.can_upscale and min(a1) < min(a2)) or (self.can_downscale and min(a1) > min(a2)):
+                img = img.resize(size2, self.interpolation)
+        return F.update(inp, img=img, homography=np.diag((ow/w,oh/h,1)))
+class RandomScale (Scale):
+    """Rescale the input PIL.Image to a random size.
+    Copied from https://github.com/pytorch in torchvision/transforms/transforms.py
+    Args:
+        min_size (int): min size of the smaller edge of the picture.
+        max_size (int): max size of the smaller edge of the picture.
+        ar (float or tuple):
+            max change of aspect ratio (width/height).
+        interpolation (int, optional): Desired interpolation. Default is
+            ``PIL.Image.BILINEAR``
+    """
+    def __init__(self, min_size, max_size, ar=1, larger=False,
+                 can_upscale=False, can_downscale=True, interpolation=Image.BILINEAR):
+        Scale.__init__(self, (min_size,max_size), can_upscale=can_upscale, can_downscale=can_downscale, interpolation=interpolation)
+        assert type(min_size) == type(max_size), 'min_size and max_size can only be 2 ints or 2 floats'
+        assert isinstance(min_size, int) and min_size >= 1 or isinstance(min_size, float) and min_size>0
+        assert isinstance(max_size, (int,float)) and min_size <= max_size
+        self.min_size = min_size
+        self.max_size = max_size
+        if type(ar) in (float,int): ar = (min(1/ar,ar),max(1/ar,ar))
+        assert 0.2 < ar[0] <= ar[1] < 5
+        self.ar = ar
+        self.larger = larger
+    def get_params(self, imsize):
+        w,h = imsize
+        if isinstance(self.min_size, float): min_size = int(self.min_size*min(w,h) + 0.5)
+        if isinstance(self.max_size, float): max_size = int(self.max_size*min(w,h) + 0.5)
+        if isinstance(self.min_size, int):   min_size = self.min_size
+        if isinstance(self.max_size, int):   max_size = self.max_size
+        if not(self.can_upscale) and not(self.larger):
+            max_size = min(max_size,min(w,h))
+        size = int(0.5 + F.rand_log_uniform(self.rng, min_size, max_size))
+        if not(self.can_upscale) and self.larger:
+            size = min(size, min(w,h))
+        ar = F.rand_log_uniform(self.rng, *self.ar) # change of aspect ratio
+        if w < h: # image is taller
+            ow = size
+            oh = int(0.5 + size * h / w / ar)
+            if oh < min_size:
+                ow,oh = int(0.5 + ow*float(min_size)/oh),min_size
+        else: # image is wider
+            oh = size
+            ow = int(0.5 + size * w / h * ar)
+            if ow < min_size:
+                ow,oh = min_size,int(0.5 + oh*float(min_size)/ow)
+        assert ow >= min_size, 'image too small (width=%d < min_size=%d)' % (ow, min_size)
+        assert oh >= min_size, 'image too small (height=%d < min_size=%d)' % (oh, min_size)
+        return ow, oh
+class RandomCrop (DatasetWithRng):
+    """Crop the given PIL Image at a random location.
+    Copied from https://github.com/pytorch in torchvision/transforms/transforms.py
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+        padding (int or sequence, optional): Optional padding on each border
+            of the image. Default is 0, i.e no padding. If a sequence of length
+            4 is provided, it is used to pad left, top, right, bottom borders
+            respectively.
+    """
+    def __init__(self, size, padding=0, **rng_seed):
+        super().__init__(**rng_seed)
+        if isinstance(size, int):
+            self.size = (int(size), int(size))
+        else:
+            self.size = size
+        self.padding = padding
+    def __repr__(self):
+        return "RandomCrop(%s)" % str(self.size)
+    def get_params(self, img, output_size):
+        w, h = img.size
+        th, tw = output_size
+        assert h >= th and w >= tw, "Image of %dx%d is too small for crop %dx%d" % (w,h,tw,th)
+        y = self.rng.integers(0, h - th) if h > th else 0
+        x = self.rng.integers(0, w - tw) if w > tw else 0
+        return x, y, tw, th
+    def __call__(self, inp):
+        img = F.grab(inp,'img')
+        padl = padt = 0
+        if self.padding:
+            if F.is_pil_image(img):
+                img = ImageOps.expand(img, border=self.padding, fill=0)
+            else:
+                assert isinstance(img, F.DummyImg)
+                img = img.expand(border=self.padding)
+            if isinstance(self.padding, int):
+                padl = padt = self.padding
+            else:
+                padl, padt = self.padding[0:2]
+        i, j, tw, th = self.get_params(img, self.size)
+        img = img.crop((i, j, i+tw, j+th))
+        return F.update(inp, img=img, homography=np.float32(((1,0,padl-i),(0,1,padt-j),(0,0,1))))
+class CenterCrop (RandomCrop):
+    """Crops the given PIL Image at the center.
+    Copied from https://github.com/pytorch in torchvision/transforms/transforms.py
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+    """
+    @staticmethod
+    def get_params(img, output_size):
+        w, h = img.size
+        th, tw = output_size
+        y = int(0.5 +((h - th) / 2.))
+        x = int(0.5 +((w - tw) / 2.))
+        return x, y, tw, th
+class RandomRotation (DatasetWithRng):
+    """Rescale the input PIL.Image to a random size.
+    Copied from https://github.com/pytorch in torchvision/transforms/transforms.py
+    Args:
+        degrees (float):
+            rotation angle.
+        interpolation (int, optional): Desired interpolation. Default is
+            ``PIL.Image.BILINEAR``
+    """
+    def __init__(self, degrees, interpolation=Image.BILINEAR, **rng_seed):
+        super().__init__(**rng_seed)
+        self.degrees = degrees
+        self.interpolation = interpolation
+    def __repr__(self):
+        return f"RandomRotation({self.degrees})"
+    def __call__(self, inp):
+        img = F.grab(inp,'img')
+        w, h = img.size
+        angle = self.rng.uniform(-self.degrees, self.degrees)
+        img = img.rotate(angle, resample=self.interpolation)
+        w2, h2 = img.size
+        trf = F.translate(w2/2,h2/2) @ F.rotate(-angle * np.pi/180) @ F.translate(-w/2,-h/2)
+        return F.update(inp, img=img, homography=trf)
+class RandomTilting (DatasetWithRng):
+    """Apply a random tilting (left, right, up, down) to the input PIL.Image
+    Copied from https://github.com/pytorch in torchvision/transforms/transforms.py
+    Args:
+        maginitude (float):
+            maximum magnitude of the random skew (value between 0 and 1)
+        directions (string):
+            tilting directions allowed (all, left, right, up, down)
+            examples: "all", "left,right", "up-down-right"
+    """
+    def __init__(self, magnitude, directions='all', **rng_seed):
+        super().__init__(**rng_seed)
+        self.magnitude = magnitude
+        self.directions = directions.lower().replace(',',' ').replace('-',' ')
+    def __repr__(self):
+        return "RandomTilt(%g, '%s')" % (self.magnitude,self.directions)
+    def __call__(self, inp):
+        img = F.grab(inp,'img')
+        w, h = img.size
+        x1,y1,x2,y2 = 0,0,h,w
+        original_plane = [(y1, x1), (y2, x1), (y2, x2), (y1, x2)]
+        max_skew_amount = max(w, h)
+        max_skew_amount = int(np.ceil(max_skew_amount * self.magnitude))
+        skew_amount = self.rng.integers(1, max_skew_amount)
+        if self.directions == 'all':
+            choices = [0,1,2,3]
+        else:
+            dirs = ['left', 'right', 'up', 'down']
+            choices = []
+            for d in self.directions.split():
+                try:
+                    choices.append(dirs.index(d))
+                except:
+                    raise ValueError('Tilting direction %s not recognized' % d)
+        skew_direction = self.rng.choice(choices)
+        # print('randomtitlting: ', skew_amount, skew_direction) # to debug random
+        if skew_direction == 0:
+            # Left Tilt
+            new_plane = [(y1, x1 - skew_amount),  # Top Left
+                         (y2, x1),                # Top Right
+                         (y2, x2),                # Bottom Right
+                         (y1, x2 + skew_amount)]  # Bottom Left
+        elif skew_direction == 1:
+            # Right Tilt
+            new_plane = [(y1, x1),                # Top Left
+                         (y2, x1 - skew_amount),  # Top Right
+                         (y2, x2 + skew_amount),  # Bottom Right
+                         (y1, x2)]                # Bottom Left
+        elif skew_direction == 2:
+            # Forward Tilt
+            new_plane = [(y1 - skew_amount, x1),  # Top Left
+                         (y2 + skew_amount, x1),  # Top Right
+                         (y2, x2),                # Bottom Right
+                         (y1, x2)]                # Bottom Left
+        elif skew_direction == 3:
+            # Backward Tilt
+            new_plane = [(y1, x1),                # Top Left
+                         (y2, x1),                # Top Right
+                         (y2 + skew_amount, x2),  # Bottom Right
+                         (y1 - skew_amount, x2)]  # Bottom Left
+        # To calculate the coefficients required by PIL for the perspective skew,
+        # see the following Stack Overflow discussion: https://goo.gl/sSgJdj
+        homography = F.homography_from_4pts(original_plane, new_plane)
+        img =  img.transform(img.size, Image.PERSPECTIVE, homography, resample=Image.BICUBIC)
+        homography = np.linalg.pinv(np.float32(homography+(1,)).reshape(3,3))
+        return F.update(inp, img=img, homography=homography)
+RandomHomography = RandomTilt = RandomTilting # redefinition
+class Homography(object):
+    """Apply a known tilting to an image
+    """
+    def __init__(self, *homography):
+        assert len(homography) == 8
+        self.homography = homography
+    def __call__(self, inp):
+        img = F.grab(inp, 'img')
+        homography = self.homography
+        img =  img.transform(img.size, Image.PERSPECTIVE, homography, resample=Image.BICUBIC)
+        homography = np.linalg.pinv(np.float32(list(homography)+[1]).reshape(3,3))
+        return F.update(inp, img=img, homography=homography)
+class StillTransform (DatasetWithRng):
+    """ Takes and return an image, without changing its shape or geometry.
+    """
+    def _transform(self, img):
+        raise NotImplementedError()
+    def __call__(self, inp):
+        img = F.grab(inp,'img')
+        # transform the image (size should not change)
+        try:
+            img = self._transform(img)
+        except TypeError:
+            pass
+        return F.update(inp, img=img)
+class PixelNoise (StillTransform):
+    """ Takes an image, and add random white noise.
+    """
+    def __init__(self, ampl=20, **rng_seed):
+        super().__init__(**rng_seed)
+        assert 0 <= ampl < 255
+        self.ampl = ampl
+    def __repr__(self):
+        return "PixelNoise(%g)" % self.ampl
+    def _transform(self, img):
+        img = np.float32(img)
+        img += self.rng.uniform(0.5-self.ampl/2, 0.5+self.ampl/2, size=img.shape)
+        return Image.fromarray(np.uint8(img.clip(0,255)))
+class ColorJitter (StillTransform):
+    """Randomly change the brightness, contrast and saturation of an image.
+    Copied from https://github.com/pytorch in torchvision/transforms/transforms.py
+    Args:
+    brightness (float): How much to jitter brightness. brightness_factor
+    is chosen uniformly from [max(0, 1 - brightness), 1 + brightness].
+    contrast (float): How much to jitter contrast. contrast_factor
+    is chosen uniformly from [max(0, 1 - contrast), 1 + contrast].
+    saturation (float): How much to jitter saturation. saturation_factor
+    is chosen uniformly from [max(0, 1 - saturation), 1 + saturation].
+    hue(float): How much to jitter hue. hue_factor is chosen uniformly from
+    [-hue, hue]. Should be >=0 and <= 0.5.
+    """
+    def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
+        self.brightness = brightness
+        self.contrast = contrast
+        self.saturation = saturation
+        self.hue = hue
+    def __repr__(self):
+        return "ColorJitter(%g,%g,%g,%g)" % (
+            self.brightness, self.contrast, self.saturation, self.hue)
+    def get_params(self, brightness, contrast, saturation, hue):
+        """Get a randomized transform to be applied on image.
+        Arguments are same as that of __init__.
+        Returns:
+        Transform which randomly adjusts brightness, contrast and
+        saturation in a random order.
+        """
+        transforms = []
+        if brightness > 0:
+            brightness_factor = self.rng.uniform(max(0, 1 - brightness), 1 + brightness)
+            transforms.append(tvf.Lambda(lambda img: F.adjust_brightness(img, brightness_factor)))
+        if contrast > 0:
+            contrast_factor = self.rng.uniform(max(0, 1 - contrast), 1 + contrast)
+            transforms.append(tvf.Lambda(lambda img: F.adjust_contrast(img, contrast_factor)))
+        if saturation > 0:
+            saturation_factor = self.rng.uniform(max(0, 1 - saturation), 1 + saturation)
+            transforms.append(tvf.Lambda(lambda img: F.adjust_saturation(img, saturation_factor)))
+        if hue > 0:
+            hue_factor = self.rng.uniform(-hue, hue)
+            transforms.append(tvf.Lambda(lambda img: F.adjust_hue(img, hue_factor)))
+        # print('colorjitter: ', brightness_factor, contrast_factor, saturation_factor, hue_factor) # to debug random seed
+        self.rng.shuffle(transforms)
+        transform = tvf.Compose(transforms)
+        return transform
+    def _transform(self, img):
+        transform = self.get_params(self.brightness, self.contrast, self.saturation, self.hue)
+        return transform(img)
+def pil_loader(path, mode='RGB'):
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+        with (path if hasattr(path,'read') else open(path, 'rb')) as f:
+            img = Image.open(f)
+            return img.convert(mode)
+def torchvision_loader(path, mode='RGB'):
+    from torchvision.io import read_file, decode_image, read_image, image
+    return read_image(getattr(path,'name',path), mode=getattr(image.ImageReadMode,mode))
+if __name__ == '__main__':
+    from matplotlib import pyplot as pl
+    import argparse
+    parser = argparse.ArgumentParser("Script to try out and visualize transformations")
+    parser.add_argument('--img', type=str, default='imgs/test.png', help='input image')
+    parser.add_argument('--trfs', type=str, required=True, help='list of transformations')
+    parser.add_argument('--layout', type=int, nargs=2, default=(3,3), help='nb of rows,cols')
+    args = parser.parse_args()
+    img = dict(img=pil_loader(args.img))
+    trfs = instanciate_transforms(args.trfs)
+    pl.subplots_adjust(0,0,1,1)
+    nr,nc = args.layout
+    while True:
+        t0 = now()
+        imgs2 = [trfs(img) for _ in range(nr*nc)]
+        for j in range(nr):
+            for i in range(nc):
+                pl.subplot(nr,nc,i+j*nc+1)
+                img2 = img if i==j==0 else imgs2.pop() #trfs(img)
+                img2 = img2['img']
+                pl.imshow(img2)
+                pl.xlabel("%d x %d" % img2.size)
+        print(f'Took {now() - t0:.2f} seconds')
+        pl.show()

datasets/transforms_tools.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import numpy as np
+from PIL import Image, ImageOps, ImageEnhance
+def grab( data, *fields ):
+    ''' Called to extract fields from a dictionary
+    '''
+    if isinstance(data, dict):
+        res = []
+        for f in fields:
+            res.append( data[f] )
+        return res[0] if len(fields) == 1 else tuple(res)
+    else: # or it must be the img directly
+        assert fields == ('img',) and isinstance(data, (np.ndarray, Image.Image)), \
+            f"data should be an image, not {type(data)}!"
+        return data
+def update( data, **fields):
+    ''' Called to update the img_and_label
+    '''
+    if isinstance( data, dict):
+        if 'homography' in fields and 'homography' in data:
+            data['homography'] = fields.pop('homography') @ data['homography']
+        data.update(fields)
+        if 'img' in fields:
+            data['imsize'] = data['img'].size
+        return data
+    else: # or it must be the img directly
+        return fields['img']
+def rand_log_uniform(rng, a, b):
+    return np.exp(rng.uniform(np.log(a),np.log(b)))
+def translate(tx, ty):
+    return np.float32(((1,0,tx),(0,1,ty,),(0,0,1)))
+def rotate(angle):
+    return np.float32(((np.cos(angle),-np.sin(angle),0),(np.sin(angle),np.cos(angle),0),(0,0,1)))
+def is_pil_image(img):
+    return isinstance(img, Image.Image)
+def homography_from_4pts(pts_cur, pts_new):
+    "pts_cur and pts_new = 4x2 point array, in [(x,y),...] format"
+    matrix = []
+    for p1, p2 in zip(pts_new, pts_cur):
+        matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]])
+        matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]])
+    A = np.matrix(matrix, dtype=np.float)
+    B = np.array(pts_cur).reshape(8)
+    homography = np.dot(np.linalg.pinv(A), B)
+    homography = tuple(np.array(homography).reshape(8))
+    #print(homography)
+    return homography

datasets/utils.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import numpy as np
+import torch
+class DatasetWithRng:
+    """ Make sure that RNG is distributed properly when torch.dataloader() is used
+    """
+    def __init__(self, seed=None):
+        self.seed = seed
+        self.rng = np.random.default_rng(seed)
+        self._rng_children = set()
+    def with_same_rng(self, dataset=None):
+        if dataset is not None:
+            assert isinstance(dataset, DatasetWithRng) and hasattr(dataset, 'rng'), bb()
+            self._rng_children.add( dataset )
+        # update all registered children
+        for db in self._rng_children:
+            db.rng = self.rng
+            db.with_same_rng() # recursive call
+        return dataset
+    def init_worker(self, tid):
+        if self.seed is None:
+            self.rng = np.random.default_rng()
+        else:
+            self.rng = np.random.default_rng(self.seed + tid)
+class WorkerWithRngInit:
+    " Dataset inherits from datasets.DatasetWithRng() and has an init_worker() function "
+    def __call__(self, tid):
+        torch.utils.data.get_worker_info().dataset.init_worker(tid)
+def corres_from_homography(homography, W, H, grid=64):
+    s = max(1, min(W, H) // grid) # at least `grid` points in smallest dim
+    sx, sy = [slice(s//2, l, s) for l in (W, H)]
+    grid1 = np.mgrid[sy, sx][::-1].reshape(2,-1).T # (x1,y1) grid
+    grid2 = applyh(homography, grid1)
+    scale = np.sqrt(np.abs(np.linalg.det(jacobianh(homography, grid1).T)))
+    corres = np.c_[grid1, grid2, np.ones_like(scale), np.zeros_like(scale), scale]
+    return corres
+def invh( H ):
+    return np.linalg.inv(H)
+def applyh(H, p, ncol=2, norm=True):
+    """ Apply the homography to a list of 2d points in homogeneous coordinates.
+    H: Homography (...x3x3 matrix/tensor)
+    p: numpy/torch/tuple of coordinates. Shape must be (...,2) or (...,3)
+    Returns an array of projected 2d points.
+    """
+    if isinstance(H, np.ndarray):
+        p = np.asarray(p)
+    elif isinstance(H, torch.Tensor):
+        p = torch.as_tensor(p, dtype=H.dtype)
+    if p.shape[-1]+1 == H.shape[-1]:
+        H = H.swapaxes(-1,-2) # transpose H
+        p = p @ H[...,:-1,:] + H[...,-1:,:]
+    else:
+        p = H @ p.T
+        if p.ndim >= 2: p = p.swapaxes(-1,-2)
+    if norm:
+        p /= p[...,-1:]
+    return p[...,:ncol]
+def jacobianh(H, p):
+    """ H is an homography that maps: f_H(x,y) --> (f_1, f_2)
+    So the Jacobian J_H evaluated at p=(x,y) is a 2x2 matrix
+    Output shape = (2, 2, N) = (f_, xy, N)
+    Example of derivative:
+                  numx    a*X + b*Y + c*Z
+        since x = ----- = ---------------
+                  denom   u*X + v*Y + w*Z
+                numx' * denom - denom' * numx   a*denom - u*numx
+        dx/dX = ----------------------------- = ----------------
+                           denom**2                 denom**2
+    """
+    (a, b, c), (d, e, f), (u, v, w) = H
+    numx, numy, denom = applyh(H, p, ncol=3, norm=False).T
+    #                column x          column x
+    J = np.float32(((a*denom - u*numx, b*denom - v*numx),  # row f_1
+                    (d*denom - u*numy, e*denom - v*numy))) # row f_2
+    return J / np.where(denom, denom*denom, np.nan)

datasets/web_images.py ADDED Viewed

	@@ -0,0 +1,50 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import os, os.path as osp
+from tqdm import trange
+from .image_set import ImageSet, verify_img
+class RandomWebImages (ImageSet):
+    """ 1 million distractors from Oxford and Paris Revisited
+        see http://ptak.felk.cvut.cz/revisitop/revisitop1m/
+    """
+    def __init__(self, start=0, end=52, root="datasets/revisitop1m"):
+        bar = None
+        imgs  = []
+        for i in range(start, end):
+            try:
+                # read cached list
+                img_list_path = osp.join(root, "image_list_%d.txt"%i)
+                cached_imgs = [e.strip() for e in open(img_list_path)]
+                assert cached_imgs, f"Cache '{img_list_path}' is empty!"
+                imgs += cached_imgs
+            except IOError:
+                if bar is None:
+                    bar = trange(start, 4*end, desc='Caching')
+                    bar.update(4*i)
+                # create it
+                imgs = []
+                for d in range(i*4,(i+1)*4): # 4096 folders in total, on average 256 each
+                    key = hex(d)[2:].zfill(3)
+                    folder = osp.join(root, key)
+                    if not osp.isdir(folder): continue
+                    imgs += [f for f in os.listdir(folder) if verify_img(osp.join(folder, f), exts='.jpg')]
+                    bar.update(1)
+                assert imgs, f"No images found in {folder}/"
+                open(img_list_path,'w').write('\n'.join(imgs))
+                imgs += imgs
+        if bar: bar.update(bar.total - bar.n)
+        super().__init__(root, imgs)
+    def get_image_path(self, idx):
+        key = self.imgs[idx]
+        return osp.join(self.root, key[:3], key)

demo_warping.py ADDED Viewed

	@@ -0,0 +1,102 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import os, os.path as osp
+from PIL import Image
+import numpy as np
+from tools.viz import pl, noticks
+""" This script will warp (deform) img2 so that it fits img1
+>> In case of memory failure (not enough GPU memory):
+   try adding '--resize 400 300' (or larger values if possible) to the _exec(...) command below.
+"""
+def parse_args():
+    import argparse
+    parser = argparse.ArgumentParser('PUMP demo script for the image warping demo')
+    parser.add_argument('--img1', default='datasets/demo_warp/mountains_src.jpg')
+    parser.add_argument('--img2', default='datasets/demo_warp/mountains_tgt.jpg')
+    parser.add_argument('--output', default='results/demo_warp')
+    parser.add_argument('--just-print', action='store_true', help='just print commands')
+    return parser.parse_args()
+def main( args ):
+    run_pump(args) and run_demo_warp(args)
+def run_pump(args):
+    output_path = osp.join(args.output, args.img1, args.img2+'.corres')
+    if osp.isfile(output_path): return True
+    return _exec(f'''python test_singlescale_recursive.py
+            --img1 {args.img1}
+            --img2 {args.img2}
+            --post-filter densify=True
+            --output {output_path}''')
+def run_demo_warp(args):
+    corres_path = osp.join(args.output, args.img1, args.img2+'.corres')
+    corres = np.load(corres_path)['corres']
+    img1 = Image.open(args.img1).convert('RGB')
+    img2 = Image.open(args.img2).convert('RGB')
+    W, H = img1.size
+    warped_img2 = warp_img(np.asarray(img2), corres[:,2:4].reshape(H,W,2))
+    pl.figure('Warping demo')
+    noticks(pl.subplot(211))
+    pl.imshow( img2 )
+    pl.title('Source image')
+    noticks(pl.subplot(223))
+    pl.imshow( img1 )
+    pl.title('Target image')
+    noticks(pl.subplot(224))
+    pl.imshow( warped_img2 )
+    pl.title('Source image warped to match target')
+    pl.tight_layout()
+    pl.show(block=True)
+def warp_img( img, absolute_flow ):
+    H1, W1, TWO = absolute_flow.shape
+    H2, W2, THREE = img.shape
+    assert TWO == 2 and  THREE == 3
+    warp = absolute_flow.round().astype(int)
+    invalid = (warp[:,:,0]<0) | (warp[:,:,0]>=W2) | (warp[:,:,1]<0) | (warp[:,:,1]>=H2)
+    warp[:,:,0] = warp[:,:,0].clip(min=0, max=W2-1)
+    warp[:,:,1] = warp[:,:,1].clip(min=0, max=H2-1)
+    warp = warp[:,:,0] + W2*warp[:,:,1]
+    warped_img = np.asarray(img).reshape(-1,3)[warp].reshape(H1,W1,3)
+    return warped_img
+def _exec(cmd):
+    # strip & remove \n
+    cmd = ' '.join(cmd.split())
+    if args.just_print:
+        print(cmd)
+        return False
+    else:
+        return os.WEXITSTATUS(os.system(cmd)) == 0
+if __name__ == '__main__':
+    args = parse_args()
+    main( args )

download_training_data.sh ADDED Viewed

	@@ -0,0 +1,53 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 3.0
+# Available only for non-commercial use
+CODE_ROOT=`pwd`
+if [ ! -e datasets ]; then
+    echo "Error: missing datasets/ folder"
+    echo "First, create a folder that can host (at least) 15 GB of data."
+    echo "Then, create a soft-link named 'data' that points to it."
+    exit -1
+fi
+# download some web images from the revisitop1m dataset
+WEB_ROOT=datasets/revisitop1m
+mkdir -p $WEB_ROOT
+cd $WEB_ROOT
+if [ ! -e 0d3 ]; then
+    for i in {1..5}; do
+        echo "Installing the web images dataset ($i/5)..."
+        if [ ! -f revisitop1m.$i.tar.gz ]; then
+            wget http://ptak.felk.cvut.cz/revisitop/revisitop1m/jpg/revisitop1m.$i.tar.gz
+        fi
+        tar -xzvf revisitop1m.$i.tar.gz
+        rm -f revisitop1m.$i.tar.gz
+    done
+fi
+cd $CODE_ROOT
+# download SfM120k pairs
+SFM_ROOT=datasets/sfm120k
+mkdir -p $SFM_ROOT
+cd $SFM_ROOT
+if [ ! -e "ims" ]; then
+    echo "Downloading the SfM120k dataset..."
+    fname=ims.tar.gz
+    if [ ! -f $fname ]; then
+        wget http://cmp.felk.cvut.cz/cnnimageretrieval/data/train/ims/ims.tar.gz
+    fi
+    tar -xzvf $fname -C ims
+    rm -f $fname
+fi
+if [ ! -e "corres" ]; then
+    echo "Installing the SfM120k dataset..."
+    fname=corres.tar.gz
+    if [ ! -f $meta ]; then
+        wget https://download.europe.naverlabs.com/corres.tar.gz
+    fi
+    tar -xzvf $fname
+    rm -f $fname
+fi
+cd $CODE_ROOT
+echo "Done!"

imgs/demo_warp.jpg ADDED Viewed

imgs/overview.png ADDED Viewed

imgs/teaser_paper.jpg ADDED Viewed

imgs/test.png ADDED Viewed

post_filter.py ADDED Viewed

	@@ -0,0 +1,235 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+import pdb, sys, os
+import argparse
+import numpy as np
+from scipy.sparse import coo_matrix, csr_matrix, triu, csgraph
+import core.functional as myF
+from tools.common import image, image_with_trf
+from tools.viz import dbgfig, show_correspondences
+def arg_parser():
+    parser = argparse.ArgumentParser("Post-filtering of Deep matching correspondences")
+    parser.add_argument("--img1", required=True, help="path to first image")
+    parser.add_argument("--img2", required=True, help="path to second image")
+    parser.add_argument("--resize", default=0, type=int, help="prior image downsize (0 if recursive)")
+    parser.add_argument("--corres", required=True, help="input path")
+    parser.add_argument("--output", default="", help="filtered corres output")
+    parser.add_argument("--locality", type=float, default=2, help="tolerance to deformation")
+    parser.add_argument("--min-cc-size", type=int, default=50, help="min connex-component size")
+    parser.add_argument("--densify", default='no', choices=['no','full','cc','convex'], help="output pixel-dense corres field")
+    parser.add_argument("--dense-side", default='left', choices=['left','right'], help="img to densify")
+    parser.add_argument("--verbose", "-v", type=int, default=0, help="verbosity level")
+    parser.add_argument("--dbg", type=str, nargs='+', default=(), help="debug options")
+    return parser
+def main(args):
+    import test_singlescale as pump
+    corres = np.load(args.corres)['corres']
+    imgs = tuple(map(image, pump.Main.load_images(args)))
+    if dbgfig('raw',args.dbg):
+        show_correspondences(*imgs, corres)
+    corres = filter_corres( *imgs, corres,
+        locality=args.locality, min_cc_size=args.min_cc_size,
+        densify=args.densify, dense_side=args.dense_side,
+        verbose=args.verbose, dbg=args.dbg)
+    if dbgfig('viz',args.dbg):
+        show_correspondences(*imgs, corres)
+    return pump.save_output( args, corres )
+def filter_corres( img0, img1, corres,
+            locality = None, # graph edge locality
+            min_cc_size = None, # min CC size
+            densify = None,
+            dense_side = None,
+            verbose = 0, dbg=()):
+    if None in (locality, min_cc_size, densify, dense_side):
+        default_params = arg_parser()
+        locality = locality or default_params.get_default('locality')
+        min_cc_size = min_cc_size or default_params.get_default('min_cc_size')
+        densify = densify or default_params.get_default('densify')
+        dense_side = dense_side or default_params.get_default('dense_side')
+    img0, trf0 = img0 if isinstance(img0,tuple) else (img0, np.eye(3))
+    img1, trf1 = img1 if isinstance(img1,tuple) else (img1, np.eye(3))
+    assert isinstance(img0, np.ndarray) and isinstance(img1, np.ndarray)
+    corres = myF.affmul((np.linalg.inv(trf0),np.linalg.inv(trf1)), corres)
+    n_corres = len(corres)
+    if verbose: print(f'>> input: {len(corres)} correspondences')
+    graph = compute_graph(corres, max_dis=locality*4)
+    if verbose: print(f'>> {locality=}: {graph.nnz} nodes in graph')
+    cc_sizes = measure_connected_components(graph)
+    corres[:,4] += np.log2(cc_sizes)
+    corres = corres[cc_sizes > min_cc_size]
+    if verbose: print(f'>> {min_cc_size=}: remaining {len(corres)} correspondences')
+    final = myF.affmul((trf0,trf1), corres)
+    if densify != 'no':
+        # densify correspondences
+        if dense_side == 'right': # temporary swap
+            final = final[:,[2,3,0,1]]
+            H = round(img1.shape[0] / trf1[1,1])
+            W = round(img1.shape[1] / trf1[0,0])
+        else:
+            H = round(img0.shape[0] / trf0[1,1])
+            W = round(img0.shape[1] / trf0[0,0])
+        if densify == 'cc':
+            assert False, 'todo'
+        elif densify in (True, 'full', 'convex'):
+            # recover true image0's shape
+            final = densify_corres( final, (H, W), full=(densify!='convex') )
+        else:
+            raise ValueError(f'Bad mode for {densify=}')
+        if dense_side == 'right': # undo temporary swap
+            final = final[:,[2,3,0,1]]
+    return final
+def compute_graph(corres, max_dis=10, min_ang=90):
+    """ 4D distances (corres can only be connected to same scale)
+        using sparse matrices for efficiency
+    step1: build horizontal and vertical binning, binsize = max_dis
+           add in each bin all neighbor bins
+    step2: for each corres, we can intersect 2 bins to get a short list of candidates
+    step3: verify euclidean distance < maxdis (optional?)
+    """
+    def bin_positions(pos):
+        # every corres goes into a single bin
+        bin_indices = np.int32(pos.clip(min=0) // max_dis) + 1
+        cols = np.arange(len(pos))
+        # add the cell before and the cell after, to handle border effects
+        res = csr_matrix((np.ones(len(bin_indices)*3,dtype=np.float32),
+            (np.r_[bin_indices-1, bin_indices, bin_indices+1], np.r_[cols,cols,cols])),
+            shape=(bin_indices.max()+2 if bin_indices.size else 1, len(pos)))
+        return res, bin_indices
+    # 1-hot matrices of shape = nbins x n_corres
+    x1_bins = bin_positions(corres[:,0])
+    y1_bins = bin_positions(corres[:,1])
+    x2_bins = bin_positions(corres[:,2])
+    y2_bins = bin_positions(corres[:,3])
+    def row_indices(ngh):
+        res = np.bincount(ngh.indptr[1:-1], minlength=ngh.indptr[-1])[:-1]
+        return res.cumsum()
+    def compute_dist( ngh, pts, scale=None ):
+        # pos from the second point
+        x_pos = pts[ngh.indices,0]
+        y_pos = pts[ngh.indices,1]
+        # subtract pos from the 1st point
+        rows = row_indices(ngh)
+        x_pos -= pts[rows, 0]
+        y_pos -= pts[rows, 1]
+        dis = np.sqrt(np.square(x_pos) + np.square(y_pos))
+        if scale is not None:
+            # there is a scale for each of the 2 pts, we encline to choose the worst one
+            dis *= (scale[rows] + scale[ngh.indices]) / 2 # so we use arithmetic instead of geometric mean
+        return normed(np.c_[x_pos, y_pos]), dis
+    def Rot( ngh, degrees ):
+        rows = row_indices(ngh)
+        rad = degrees * np.pi / 180
+        rad = (rad[rows] + rad[ngh.indices]) / 2 # average angle between 2 corres
+        cos, sin = np.cos(rad), np.sin(rad)
+        return np.float32(((cos, -sin), (sin,cos))).transpose(2,0,1)
+    def match(xbins, ybins, pt1, pt2, way):
+        xb, ixb = xbins
+        yb, iyb = ybins
+        # gets for each corres a list of potential matches
+        ngh = xb[ixb].multiply( yb[iyb] ) # shape = n_corres x n_corres
+        ngh = triu(ngh, k=1).tocsr() # remove mirrored matches
+        # ngh = matches of matches, shape = n_corres x n_corres
+        # verify locality and flow
+        vec1, d1 = compute_dist(ngh, pt1) # for each match, distance and orientation in img1
+        # assert d1.max()**0.5 < 2*max_dis*1.415, 'cannot be larger than 2 cells in diagonals, or there is a bug'+bb()
+        scale, rot = myF.decode_scale_rot(corres[:,5])
+        vec2, d2 = compute_dist(ngh, pt2, scale=scale**(-way))
+        ang = np.einsum('ik,ik->i', (vec1[:,None] @ Rot(ngh,way*rot))[:,0], vec2)
+        valid = (d1 <= max_dis) & (d2 <= max_dis) & (ang >= np.cos(min_ang*np.pi/180))
+        res = csr_matrix((valid, ngh.indices, ngh.indptr), shape=ngh.shape)
+        res.eliminate_zeros()
+        return res
+    # find all neihbors within each xy bin
+    ngh1 = match(x1_bins, y1_bins, corres[:,0:2], corres[:,2:4], way=+1)
+    ngh2 = match(x2_bins, y2_bins, corres[:,2:4], corres[:,0:2], way=-1).T
+    return ngh1 + ngh2 # union
+def measure_connected_components(graph, dbg=()):
+    # compute connected components
+    nc, labels = csgraph.connected_components(graph, directed=False)
+    # filter and remove all small components
+    count = np.bincount(labels)
+    return count[labels]
+def normed( mat ):
+    return mat / np.linalg.norm(mat, axis=-1, keepdims=True).clip(min=1e-16)
+def densify_corres( corres, shape, full=True ):
+    from scipy.interpolate import LinearNDInterpolator
+    from scipy.spatial import cKDTree as KDTree
+    assert len(corres) > 3, 'Not enough corres for densification'
+    H, W = shape
+    interp = LinearNDInterpolator(corres[:,0:2], corres[:,2:4])
+    X, Y = np.mgrid[0:H, 0:W][::-1] # H x W, H x W
+    p1 = np.c_[X.ravel(), Y.ravel()]
+    p2 = interp(X, Y) # H x W x 2
+    p2 = p2.reshape(-1,2)
+    invalid = np.isnan(p2).any(axis=1)
+    if full:
+        # interpolate pixels outside of the convex hull
+        badp = p1[invalid]
+        tree = KDTree(corres[:,0:2])
+        _, nn = tree.query(badp, 3) # find 3 closest neighbors
+        corflow = corres[:,2:4] - corres[:,0:2]
+        p2.reshape(-1,2)[invalid] = corflow[nn].mean(axis=1) + p1[invalid]
+    else:
+        # remove nans, i.e. remove points outside of convex hull
+        p1, p2 = p1[~invalid], p2[~invalid]
+    # return correspondence field
+    return np.c_[p1, p2]
+if __name__ == '__main__':
+    main(arg_parser().parse_args())

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+matplotlib
+numpy
+scipy
+torch==1.11.0
+torchvision==0.12.0

run_ETH3D.py ADDED Viewed

	@@ -0,0 +1,118 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import os, os.path as osp
+from tqdm import tqdm
+import numpy as np
+SEQUENCES = [ 'lakeside', 'sand_box', 'storage_room', 'storage_room_2', 'tunnel',
+              'delivery_area', 'electro', 'forest', 'playground', 'terrains']
+RATES = [3, 5, 7, 9, 11, 13, 15]
+def parse_args():
+    import argparse
+    parser = argparse.ArgumentParser('PUMP evaluation script for the ETH3D dataset')
+    parser.add_argument('--root', default='datasets/eth3d')
+    parser.add_argument('--output', default='results/eth3d')
+    parser.add_argument('--just-print', action='store_true', help='just print commands')
+    return parser.parse_args()
+def main( args ):
+    run_pump(args) and run_eval(args)
+def run_pump(args):
+    done = True
+    for img1, img2 in tqdm(list_eth3d_pairs()):
+        output_path = osp.join(args.output, img1, img2+'.corres')
+        if osp.isfile(output_path): continue
+        done = False
+        _exec(f'''python test_multiscale_recursive.py
+                --img1 {osp.join(args.root,img1)}
+                --img2 {osp.join(args.root,img2)}
+                --max-scale 1.5
+                --desc PUMP
+                --post-filter "densify=True,dense_side='right'"
+                --output {output_path}''')
+    return done
+def run_eval( args ):
+    for rate in RATES:
+        mean_aepe_per_rate = 0
+        for seq in SEQUENCES:
+            pairs = np.load(osp.join(args.root, 'info_ETH3D_files', f'{seq}_every_5_rate_of_{rate}'), allow_pickle=True)
+            mean_aepe_per_seq = 0
+            for pair in pairs:
+                img1, img2 = pair['source_image'], pair['target_image']
+                Ys, Xs, Yt, Xt = [np.float32(pair[k]) for k in 'Ys Xs Yt Xt'.split()]
+                corres_path = osp.join(args.output, img1, img2+'.corres')
+                corres = np.load(corres_path, allow_pickle=True)['corres']
+                # extract estimated and target flow
+                W, H = np.int32(corres[-1, 2:4] + 1)
+                flow = (corres[:,0:2] - corres[:,2:4]).reshape(H, W, 2)
+                iYt, iXt = np.int32(np.round(Yt)), np.int32(np.round(Xt))
+                if 'correct way':
+                    gt_targets = np.c_[Xs - Xt, Ys - Yt]
+                    est_targets = flow[iYt, iXt]
+                elif 'GLU-Net way (somewhat inaccurate because of overlapping points in the mask)':
+                    mask = np.zeros((H,W), dtype=bool)
+                    mask[iYt, iXt] = True
+                    gt_flow = np.full((H,W,2), np.nan, dtype=np.float32)
+                    gt_flow[iYt, iXt, 0] = Xs - Xt
+                    gt_flow[iYt, iXt, 1] = Ys - Yt
+                    gt_targets = gt_flow[mask]
+                    est_targets = flow[mask]
+                # compute end-point error
+                aepe = np.linalg.norm(est_targets - gt_targets, axis=-1).mean()
+                mean_aepe_per_seq += aepe
+            mean_aepe_per_seq /= len(pairs)
+            mean_aepe_per_rate += mean_aepe_per_seq
+            print(f'mean AEPE for {rate=} {seq=}:', mean_aepe_per_seq)
+        print(f'>> mean AEPE for {rate=}:', mean_aepe_per_rate / len(SEQUENCES))
+def list_eth3d_pairs():
+    path = osp.join(args.root, 'info_ETH3D_files', 'list_pairs.txt')
+    try:
+        lines = open(path).read().splitlines()
+    except OSError:
+        lines = []
+        for seq in SEQUENCES:
+            for rate in RATES:
+                pairs = np.load(osp.join(args.root, 'info_ETH3D_files', f'{seq}_every_5_rate_of_{rate}'), allow_pickle=True)
+                for pair in pairs:
+                    lines.append(pair['source_image'] + ' ' + pair['target_image'])
+        open(path, 'w').write('\n'.join(lines))
+    pairs = [line.split() for line in lines if line[0] != '#']
+    return pairs
+def _exec(cmd):
+    # strip & remove \n
+    cmd = ' '.join(cmd.split())
+    if args.just_print:
+        print(cmd)
+    else:
+        os.system(cmd)
+if __name__ == '__main__':
+    args = parse_args()
+    main( args )

test_multiscale.py ADDED Viewed

	@@ -0,0 +1,262 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+from itertools import starmap
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import test_singlescale as tss
+from core import functional as myF
+from tools.common import todevice, cpu
+from tools.viz import dbgfig, show_correspondences
+def arg_parser():
+    parser = tss.arg_parser()
+    parser.set_defaults(levels = 0, verbose=0)
+    parser.add_argument('--min-scale', type=float, default=None, help='min scale ratio')
+    parser.add_argument('--max-scale', type=float, default=4, help='max scale ratio')
+    parser.add_argument('--min-rot', type=float, default=None, help='min rotation (in degrees) in [-180,180]')
+    parser.add_argument('--max-rot', type=float, default=0, help='max rotation (in degrees) in [0,180]')
+    parser.add_argument('--crop-rot', action='store_true', help='crop rotated image to prevent memory blow-up')
+    parser.add_argument('--rot-step', type=int, default=45, help='rotation step (in degrees)')
+    parser.add_argument('--no-swap', type=int, default=1, nargs='?', const=0, choices=[1,0,-1], help='if 0, img1 will have keypoints on a grid')
+    parser.add_argument('--same-levels', action='store_true', help='use the same number of pyramid levels for all scales')
+    parser.add_argument('--merge', choices='torch cpu cuda'.split(), default='cpu')
+    return parser
+class MultiScalePUMP (nn.Module):
+    """ DeepMatching that loops over all possible {scale x rotation} combinations.
+    """
+    def __init__(self, matcher,
+                    min_scale=1,
+                    max_scale=1,
+                    max_rot=0,
+                    min_rot=0,
+                    rot_step=45,
+                    swap_mode=1,
+                    same_levels=False,
+                    crop_rot=False):
+        super().__init__()
+        min_scale = min_scale or 1/max_scale
+        min_rot = min_rot or -max_rot
+        assert 0.1 <= min_scale <= max_scale <= 10
+        assert -180 <= min_rot <= max_rot <= 180
+        self.matcher = matcher
+        self.matcher.crop_rot = crop_rot
+        self.min_sc = min_scale
+        self.max_sc = max_scale
+        self.min_rot = min_rot
+        self.max_rot = max_rot
+        self.rot_step = rot_step
+        self.swap_mode = swap_mode
+        self.merge_device = None
+        self.same_levels = same_levels
+    @torch.no_grad()
+    def forward(self, img1, img2, dbg=()):
+        img1, sca1 = img1 if isinstance(img1, tuple) else (img1, torch.eye(3, device=img1.device))
+        img2, sca2 = img2 if isinstance(img2, tuple) else (img2, torch.eye(3, device=img2.device))
+        # prepare correspondences accumulators
+        if self.same_levels: # limit number of levels
+            self.matcher.levels = self._find_max_levels(img1,img2)
+        elif self.matcher.levels == 0:
+            max_psize = int(min(np.mean(img1.shape[-2:]), np.mean(img2.shape[-2:])))
+            self.matcher.levels = int(np.log2(max_psize / self.matcher.pixel_desc.get_atomic_patch_size()))
+        all_corres = (self._make_accu(img1), self._make_accu(img2))
+        for scale, ang, code, swap, swapped, (scimg1, scimg2) in self._enum_scaled_pairs(img1, img2):
+            print(f"processing {scale=:g} x {ang=} {['','(swapped)'][swapped]} ({code=})...")
+            # compute correspondences with rotated+scaled image
+            corres, rots = self.process_one_scale(swapped, *[scimg1,scimg2], dbg=dbg)
+            if dbgfig('corres-ms', dbg): viz_correspondences(img1, img2, *corres, fig='last')
+            # merge correspondences in the reference frame
+            self.merge_corres( corres, rots, all_corres, code )
+        # final intersection
+        corres = self.reciprocal( *all_corres )
+        return myF.affmul(todevice((sca1,sca2),corres.device), corres) # rescaling to original image scale
+    def process_one_scale(self, swapped, *imgs, dbg=()):
+        return unswap(self.matcher(*imgs, ret='raw', dbg=dbg), swapped)
+    def _find_max_levels(self, img1, img2):
+        min_levels = self.matcher.levels or 999
+        for _, _, code, _, _, (img1, img2) in self._enum_scaled_pairs(img1, img2):
+            # first level when a parent dont have children: gap >= min(shape), with gap = 2**(level-2)
+            img1_levels = ceil(np.log2(min(img1[0].shape[-2:])) - 1)
+            # first level when img2's shape becomes smaller than self.min_shape, with shape = min(shape) / 2**level
+            img2_levels = ceil(np.log2(min(img2[0].shape[-2:]) / self.matcher.min_shape))
+            # print(f'predicted levels for {code=}:\timg1 --> {img1_levels},\timg2 --> {img2_levels} levels')
+            min_levels = min(min_levels, img1_levels, img2_levels)
+        return min_levels
+    def merge_corres(self, corres, rots, all_corres, code):
+        " rot : reference --> rotated "
+        self.merge_one_side( corres[0], slice(0,2), rots[0], all_corres[0], code )
+        self.merge_one_side( corres[1], slice(2,4), rots[1], all_corres[1], code )
+    def merge_one_side(self, corres, sel, trf, all_corres, code ):
+        pos, scores = corres
+        grid, accu = all_corres
+        accu = accu.view(-1, 6)
+        # compute 4-nn in transformed image for each grid point
+        best4 = torch.cdist(pos[:,sel].float(), grid).topk(4, dim=0, largest=False)
+        # best4.shape = (4, len(grid))
+        # update if score is better AND distance less than 2x best dist
+        scale = float(torch.sqrt(torch.det(trf))) # == scale (with scale >= 1)
+        dist_max = 8*scale - 1e-7 # 2x the distance between contiguous patches
+        close_enough = (best4.values <= 2*best4.values[0:1]) & (best4.values < dist_max)
+        neg_inf = torch.tensor(-np.inf, device=scores.device)
+        best_score = torch.where(close_enough, scores.ravel()[best4.indices], neg_inf).max(dim=0)
+        is_better = best_score.values > accu[:,4].ravel()
+        accu[is_better,0:4] = pos[best4.indices[best_score.indices,torch.arange(len(grid))][is_better]]
+        accu[is_better,4] = best_score.values[is_better]
+        accu[is_better,5] = code
+    def reciprocal(self, corres1, corres2 ):
+        grid1, corres1 = cpu(corres1)
+        grid2, corres2 = cpu(corres2)
+        (H1, W1), (H2, W2) = grid1[-1]+1, grid2[-1]+1
+        pos1 = corres1[:,:,0:4].view(-1,4)
+        pos2 = corres2[:,:,0:4].view(-1,4)
+        to_int = torch.tensor((W1*H2*W2, H2*W2, W2, 1), dtype=torch.float32)
+        inter1 = myF.intersection(pos1@to_int, pos2@to_int)
+        return corres1.view(-1,6)[inter1]
+    def _enum_scales(self):
+        for i in range(-100,101):
+            scale = 2**(i/2)
+            # if i != -2: continue
+            if self.min_sc <= scale <= self.max_sc:
+                yield i,scale
+    def _enum_rotations(self):
+        for i in range(-180//self.rot_step, 180//self.rot_step):
+            rot = i * self.rot_step
+            if self.min_rot <= rot <= self.max_rot:
+                yield i,-rot
+    def _enum_scaled_pairs(self, img1, img2):
+        for s, scale in self._enum_scales():
+            (i1,sca1), (i2,sca2) = starmap(downsample_img, [(img1, min(scale, 1)), (img2, min(1/scale, 1))])
+            # set bigger image as the first one
+            size1 = min(i1.shape[-2:])
+            size2 = min(i2.shape[-2:])
+            swapped = size1*self.swap_mode < size2*self.swap_mode
+            swap = (1 - 2*swapped) # swapped ==> swap = -1
+            if swapped:
+                (i1,sca1), (i2,sca2) = (i2,sca2), (i1,sca1)
+            for r, ang in self._enum_rotations():
+                code = myF.encode_scale_rot(scale, ang)
+                trf1 = (sca1, swap*ang) if ang != 0 else sca1
+                yield scale, ang, code, swap, swapped, ((i1,trf1), (i2,sca2))
+    def _make_accu(self, img):
+        C, H, W = img.shape
+        step = self.matcher.pixel_desc.get_atomic_patch_size() // 2
+        h = step//2 - 1
+        accu = img.new_zeros(((H+h)//step, (W+h)//step, 6), dtype=torch.float32, device=self.merge_device or img.device)
+        grid = step * myF.mgrid(accu[:,:,0], device=img.device) + (step//2)
+        return grid, accu
+def downsample_img(img, scale=0):
+    assert scale <= 1
+    img, trf = img if isinstance(img, tuple) else (img, torch.eye(3, device=img.device))
+    if scale == 1: return img, trf
+    assert img.dtype == torch.uint8
+    trf = trf.clone() # dont modify inplace
+    trf[:2,:2] /= scale
+    while scale <= 0.5:
+        img = F.avg_pool2d(img[None].float(), 2, stride=2, count_include_pad=False)[0]
+        scale *= 2
+    if scale != 1:
+        img = F.interpolate(img[None].float(), scale_factor=scale, mode='bicubic', align_corners=False, recompute_scale_factor=False).clamp(min=0, max=255)[0]
+    return img.byte(), trf # scaled --> pxl
+def ceil(i):
+    return int(np.ceil(i))
+def unswap( corres, swapped ):
+    swap = -1 if swapped else 1
+    corres, rots = corres
+    corres = corres[::swap]
+    rots = rots[::swap]
+    if swapped:
+        for pos, _ in corres:
+            pos[:,0:4] = pos[:,[2,3,0,1]].clone()
+    return corres, rots
+def demultiplex_img_trf(self, img, force=False):
+    """ img is:
+        - an image
+        - a tuple (image, trf)
+        - a tuple (image, (cur_trf, trf_todo))
+    In any case, trf: cur_pix --> old_pix
+    """
+    img, trf = img if isinstance(img, tuple) else (img, torch.eye(3, device=img.device))
+    if isinstance(trf, tuple):
+        trf, todo = trf
+        if isinstance(todo, (int,float)): # pure rotation
+            img, trf = myF.rotate_img((img,trf), angle=todo, crop=self.crop_rot)
+        else:
+            img = myF.apply_trf_to_img(todo, img)
+            trf = trf @ todo
+    return img, trf
+class Main (tss.Main):
+    @staticmethod
+    def get_options( args ):
+        return dict(max_scale=args.max_scale, min_scale=args.min_scale,
+                    max_rot=args.max_rot, min_rot=args.min_rot, rot_step=args.rot_step,
+                    swap_mode=args.no_swap, same_levels=args.same_levels, crop_rot=args.crop_rot)
+    @staticmethod
+    def tune_matcher( args, matcher, device ):
+        if device == 'cpu':
+            args.merge = 'cpu'
+        if args.merge == 'cpu': type(matcher).merge_corres = myF.merge_corres; matcher.merge_device = 'cpu'
+        elif args.merge == 'cuda': type(matcher).merge_corres = myF.merge_corres
+        return matcher.to(device)
+    @staticmethod
+    def build_matcher( args, device):
+        # get a normal matcher
+        matcher = tss.Main.build_matcher(args, device)
+        type(matcher).demultiplex_img_trf = demultiplex_img_trf # update transformer
+        options = Main.get_options(args)
+        return Main.tune_matcher(args, MultiScalePUMP(matcher, **options), device)
+if __name__ == '__main__':
+    Main().run_from_args(arg_parser().parse_args())

test_multiscale_recursive.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+import test_singlescale as ss
+import test_singlescale_recursive as ssr
+import test_multiscale as ms
+def arg_parser():
+    parser = ssr.arg_parser(ms.arg_parser())
+    return parser
+class Main (ms.Main):
+    @staticmethod
+    def build_matcher(args, device):
+        # get a single-scale recursive matcher
+        matcher = ssr.Main.build_matcher(args, device)
+        type(matcher).demultiplex_img_trf = ms.demultiplex_img_trf # update transformer
+        options = Main.get_options(args)
+        return Main.tune_matcher(args, ms.MultiScalePUMP(matcher, **options), device).to(device)
+if __name__ == '__main__':
+    Main().run_from_args(arg_parser().parse_args())

test_singlescale.py ADDED Viewed

	@@ -0,0 +1,284 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from core import functional as myF
+from core.pixel_desc import PixelDesc
+from tools.common import mkdir_for, todevice, cudnn_benchmark, nparray, image, image_with_trf
+from tools.viz import dbgfig, show_correspondences
+def arg_parser():
+    import argparse
+    parser = argparse.ArgumentParser('SingleScalePUMP on GPU with PyTorch')
+    parser.add_argument('--img1', required=True, help='path to img1')
+    parser.add_argument('--img2', required=True, help='path to img2')
+    parser.add_argument('--resize', type=int, default=512, nargs='+', help='prior downsize of img1 and img2')
+    parser.add_argument('--output', default=None, help='output path for correspondences')
+    parser.add_argument('--levels', type=int, default=99, help='number of pyramid levels')
+    parser.add_argument('--min-shape', type=int, default=5, help='minimum size of corr maps')
+    parser.add_argument('--nlpow', type=float, default=1.5, help='non-linear activation power in [1,2]')
+    parser.add_argument('--border', type=float, default=0.9, help='border invariance level in [0,1]')
+    parser.add_argument('--dtype', default='float16', choices='float16 float32 float64'.split())
+    parser.add_argument('--desc', default='PUMP-stytrf', help='checkpoint name')
+    parser.add_argument('--first-level', choices='torch'.split(), default='torch')
+    parser.add_argument('--activation', choices='torch'.split(), default='torch')
+    parser.add_argument('--forward', choices='torch cuda cuda-lowmem'.split(), default='cuda-lowmem')
+    parser.add_argument('--backward', choices='python torch cuda'.split(), default='cuda')
+    parser.add_argument('--reciprocal', choices='cpu cuda'.split(), default='cpu')
+    parser.add_argument('--post-filter', default=None, const=True, nargs='?', help='post-filtering (See post_filter.py)')
+    parser.add_argument('--verbose', type=int, default=0, help='verbosity')
+    parser.add_argument('--device', default='cuda', help='gpu device')
+    parser.add_argument('--dbg', nargs='*', default=(), help='debug options')
+    return parser
+class SingleScalePUMP (nn.Module):
+    def __init__(self, levels = 9, nlpow = 1.4, cutoff = 1,
+                 border_inv=0.9, min_shape=5, renorm=(),
+                 pixel_desc = None, dtype = torch.float32,
+                 verbose = True ):
+        super().__init__()
+        self.levels = levels
+        self.min_shape = min_shape
+        self.nlpow = nlpow
+        self.border_inv = border_inv
+        assert pixel_desc, 'Requires a pixel descriptor'
+        self.pixel_desc = pixel_desc.configure(self)
+        self.dtype = dtype
+        self.verbose = verbose
+    @torch.no_grad()
+    def forward(self, img1, img2, ret='corres', dbg=()):
+        with cudnn_benchmark(False):
+            # compute descriptors
+            (img1, img2), pixel_descs, trfs = self.extract_descs(img1, img2, dtype=self.dtype)
+            # backward and forward passes
+            pixel_corr = self.first_level(*pixel_descs, dbg=dbg)
+            pixel_corr = self.backward_pass(self.forward_pass(pixel_corr, dbg=dbg), dbg=dbg)
+            # recover correspondences
+            corres = myF.best_correspondences( pixel_corr )
+        if dbgfig('corres', dbg): viz_correspondences(img1[0], img2[0], *corres, fig='last')
+        corres = [(myF.affmul(trfs,pos),score) for pos, score in corres] # rectify scaling etc.
+        if ret == 'raw': return corres, trfs
+        return self.reciprocal(*corres)
+    def extract_descs(self, img1, img2, dtype=None):
+        img1, sca1 = self.demultiplex_img_trf(img1)
+        img2, sca2 = self.demultiplex_img_trf(img2)
+        desc1, trf1 = self.pixel_desc(img1)
+        desc2, trf2 = self.pixel_desc(img2)
+        return (img1, img2), (desc1.type(dtype), desc2.type(dtype)), (sca1@trf1, sca2@trf2)
+    def demultiplex_img_trf(self, img, **kw):
+        return img if isinstance(img, tuple) else (img, torch.eye(3, device=img.device))
+    def forward_pass(self, pixel_corr, dbg=()):
+        weights = None
+        if isinstance(pixel_corr, tuple):
+            pixel_corr, weights = pixel_corr
+        # first-level with activation
+        if self.verbose: print(f'  Pyramid level {0} shape={tuple(pixel_corr.shape)}')
+        pyramid = [ self.activation(0,pixel_corr) ]
+        if dbgfig(f'corr0', dbg): viz_correlation_maps(*from_stack('img1','img2'), pyramid[0], fig='last')
+        for level in range(1, self.levels+1):
+            upper, weights = self.forward_level(level, pyramid[-1], weights)
+            if weights.sum() == 0: break # img1 has become too small
+            # activation
+            pyramid.append( self.activation(level,upper) )
+            if self.verbose: print(f'  Pyramid level {level} shape={tuple(upper.shape)}')
+            if dbgfig(f'corr{level}', dbg): viz_correlation_maps(*from_stack('img1','img2'), upper, level=level, fig='last')
+            if min(upper.shape[-2:]) <= self.min_shape: break # img2 has become too small
+        return pyramid
+    def forward_level(self, level, corr, weights):
+        # max-pooling
+        pooled = F.max_pool2d(corr, 3, padding=1, stride=2)
+        # sparse conv
+        return myF.sparse_conv(level, pooled, weights, norm=self.border_inv)
+    def backward_pass(self, pyramid, dbg=()):
+        # same than forward in reverse order
+        for level in range(len(pyramid)-1, 0, -1):
+            lower = self.backward_level(level, pyramid)
+            # assert not torch.isnan(lower).any(), bb()
+            if self.verbose: print(f'  Pyramid level {level-1} shape={tuple(lower.shape)}')
+            del pyramid[-1] # free memory
+            if dbgfig(f'corr{level}-bw', dbg): viz_correlation_maps(img1, img2, lower, fig='last')
+        return pyramid[0]
+    def backward_level(self, level, pyramid):
+        # reverse sparse-coonv
+        pooled = myF.sparse_conv(level, pyramid[level], reverse=True)
+        # reverse max-pool and add to lower level
+        return myF.max_unpool(pooled, pyramid[level-1])
+    def activation(self, level, corr):
+        assert 1 <= self.nlpow <= 3
+        corr.clamp_(min=0).pow_(self.nlpow)
+        return corr
+    def first_level(self, desc1, desc2, dbg=()):
+        assert desc1.ndim == desc2.ndim == 4
+        assert len(desc1) == len(desc2) == 1, "not implemented"
+        H1, W1 = desc1.shape[-2:]
+        H2, W2 = desc2.shape[-2:]
+        patches = F.unfold(desc1, 4, stride=4) # C*4*4, H1*W1//16
+        B, C, N = patches.shape
+        # rearrange(patches, 'B (C Kh Kw) H1W1 -> B H1W1 C Kh Kw', Kh=4, Kw=4)
+        patches = patches.permute(0, 2, 1).view(B, H1W1, C//16, 4, 4)
+        corr, norms = myF.normalized_corr(patches[0], desc2[0], ret_norms=True)
+        if dbgfig('ncc',dbg):
+            for j in range(0,len(corr),9):
+              for i in range(9):
+                pl.subplot(3,3,i+1).cla()
+                i += j
+                pl.imshow(corr[i], vmin=0.9, vmax=1)
+                pl.plot(2+(i%16)*4, 2+(i//16)*4,'xr', ms=10)
+              bb()
+        return corr.view(H1//4, W1//4, H2+1, W2+1), (norms.view(H1//4, W1//4)>0).float()
+    def reciprocal(self, corres1, corres2 ):
+        corres1, corres2 = todevice(corres1, 'cpu'), todevice(corres2, 'cpu')
+        return myF.reciprocal(self, corres1, corres2)
+class Main:
+    def __init__(self):
+        self.post_filtering = False
+    def run_from_args(self, args):
+        device = args.device
+        self.matcher = self.build_matcher(args, device)
+        if args.post_filter:
+            self.post_filtering = {} if args.post_filter is True else eval(f'dict({args.post_filter})')
+        corres = self(*self.load_images(args, device), dbg=set(args.dbg))
+        if args.output:
+            self.save_output( args.output, corres )
+    def run_from_args_with_images(self, img1, img2, args):
+        device = args.device
+        self.matcher = self.build_matcher(args, device)
+        if args.post_filter:
+            self.post_filtering = {} if args.post_filter is True else eval(f'dict({args.post_filter})')
+        if isinstance(args.resize, int): # user can provide 2 separate sizes for each image
+            args.resize = (args.resize, args.resize)
+        if len(args.resize) == 1:
+            args.resize = 2 * args.resize
+        images = []
+        for imgx, size in zip([img1, img2], args.resize):
+            img = torch.from_numpy(np.array(imgx.convert('RGB'))).permute(2,0,1).to(device)
+            img = myF.imresize(img, size)
+            images.append( img )
+        corres = self(*images, dbg=set(args.dbg))
+        if args.output:
+            self.save_output( args.output, corres )
+        return corres
+    @staticmethod
+    def get_options( args ):
+        # configure the pipeline
+        pixel_desc = PixelDesc(path=f'checkpoints/{args.desc}.pt')
+        return dict(levels=args.levels, min_shape=args.min_shape, border_inv=args.border, nlpow=args.nlpow,
+                    pixel_desc=pixel_desc, dtype=eval(f'torch.{args.dtype}'), verbose=args.verbose)
+    @staticmethod
+    def tune_matcher( args, matcher, device ):
+        if device == 'cpu':
+            matcher.dtype = torch.float32
+            args.forward = 'torch'
+            args.backward = 'torch'
+            args.reciprocal = 'cpu'
+        if args.forward == 'cuda':       type(matcher).forward_level = myF.forward_cuda
+        if args.forward == 'cuda-lowmem':type(matcher).forward_level = myF.forward_cuda_lowmem
+        if args.backward == 'python':    type(matcher).backward_pass = legacy.backward_python
+        if args.backward == 'cuda':      type(matcher).backward_level = myF.backward_cuda
+        if args.reciprocal == 'cuda':    type(matcher).reciprocal = myF.reciprocal
+        return matcher.to(device)
+    @staticmethod
+    def build_matcher(args, device):
+        options = Main.get_options(args)
+        matcher = SingleScalePUMP(**options)
+        return Main.tune_matcher(args, matcher, device)
+    def __call__(self, *imgs, dbg=()):
+        corres = self.matcher( *imgs, dbg=dbg).cpu().numpy()
+        if self.post_filtering is not False:
+            corres = self.post_filter( imgs, corres )
+        if 'print' in dbg: print(corres)
+        if dbgfig('viz',dbg):   show_correspondences(*imgs, corres)
+        return corres
+    @staticmethod
+    def load_images( args, device='cpu' ):
+        def read_image(impath):
+            try:
+                from torchvision.io.image import read_image, ImageReadMode
+                return read_image(impath, mode=ImageReadMode.RGB)
+            except RuntimeError:
+                from PIL import Image
+                return torch.from_numpy(np.array(Image.open(impath).convert('RGB'))).permute(2,0,1)
+        if isinstance(args.resize, int): # user can provide 2 separate sizes for each image
+            args.resize = (args.resize, args.resize)
+        if len(args.resize) == 1:
+            args.resize = 2 * args.resize
+        images = []
+        for impath, size in zip([args.img1, args.img2], args.resize):
+            img = read_image(impath).to(device)
+            img = myF.imresize(img, size)
+            images.append( img )
+        return images
+    def post_filter(self, imgs, corres ):
+        from post_filter import filter_corres
+        return filter_corres(*map(image_with_trf,imgs), corres, **self.post_filtering)
+    def save_output(self, output_path, corres ):
+        mkdir_for( output_path )
+        np.savez(open(output_path,'wb'), corres=corres)
+if __name__ == '__main__':
+    Main().run_from_args(arg_parser().parse_args())

test_singlescale_recursive.py ADDED Viewed

	@@ -0,0 +1,156 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+from tqdm import tqdm
+import numpy as np
+import torch
+import test_singlescale as tss
+import core.functional as myF
+from tools.viz import dbgfig, show_correspondences
+def arg_parser(parser = None):
+    parser = parser or tss.arg_parser()
+    parser.add_argument('--rec-overlap', type=float, default=0.5, help='overlap between tiles in [0,0.5]')
+    parser.add_argument('--rec-score-thr', type=float, default=1, help='corres score threshold to guide fine levels')
+    parser.add_argument('--rec-fast-thr', type=float, default=0.1, help='prune block if less than `fast` corres fall in it')
+    return parser
+class RecursivePUMP (tss.SingleScalePUMP):
+    """ Recursive PUMP:
+        1) find initial correspondences at a coarse scale,
+        2) refine them at a selection of finer scales
+    """
+    def __init__(self, coarse_size=512, fine_size=512, rec_overlap=0.5, rec_score_thr=1.0,
+                       rec_fast_thr = 0.1, **other_options ):
+        super().__init__(**other_options)
+        assert 10 < coarse_size < 1024
+        assert 10 < fine_size < 1024
+        assert 0 <= rec_overlap < 1
+        assert 0 < rec_fast_thr < 1
+        self.coarse_size = coarse_size
+        self.fine_size = fine_size
+        self.overlap = rec_overlap
+        self.score_thr = rec_score_thr
+        self.fast_thr = rec_fast_thr
+    @torch.no_grad()
+    def forward(self, img1, img2, ret='corres', dbg=()):
+        img1, sca1 = self.demultiplex_img_trf(img1, force=True)
+        img2, sca2 = self.demultiplex_img_trf(img2, force=True)
+        input_trfs = (sca1, sca2)
+        # coarse first level with low-res images
+        corres = self.coarse_correspondences(img1, img2)
+        # fine level: iterate on HQ blocks
+        accu1, accu2 = (self._make_accu(img1), self._make_accu(img2))
+        for block1, block2 in tqdm(list(self._enumerate_blocks(img1, img2, corres))):
+            # print(f"img1[{block1[}:{}, {}:{}]"
+            accus, trfs = tss.SingleScalePUMP.forward(self, block1, block2, ret='raw', dbg=dbg)
+            self._update_accu( accu1, accus[0], trfs[0][:2,2] )
+            self._update_accu( accu2, accus[1], trfs[1][:2,2] )
+        demul = lambda accu: (accu[:,:,:4].reshape(-1,4).clone(), accu[:,:,4].clone())
+        corres = demul(accu1), demul(accu2)
+        if dbgfig('corres', dbg): viz_correspondences(img1, img2, *corres, fig='last')
+        corres = [(myF.affmul(input_trfs,pos),score) for pos, score in corres] # rectify scaling etc.
+        if ret == 'raw': return corres, input_trfs
+        return self.reciprocal(*corres)
+    def coarse_correspondences(self, img1, img2, **kw):
+        # joint image resize, because relative size is important (multiscale)
+        shape1, shape2 = img1.shape[-2:], img2.shape[-2:]
+        if max(shape1 + shape2) > self.coarse_size:
+            f1 = self.coarse_size / max(shape1)
+            f2 = self.coarse_size / max(shape2)
+            f = min(f1, f2)
+            img1 = myF.imresize( img1, int(0.5+f*max(shape1)) )
+            img2 = myF.imresize( img2, int(0.5+f*max(shape2)) )
+        else:
+            f = 1
+        init_corres = tss.SingleScalePUMP.forward(self, img1, img2, **kw)
+        # show_correspondences(img1, img2, init_corres, fig='last')
+        corres = init_corres[init_corres[:,4] > self.score_thr]
+        print(f"  keeping {len(corres)}/{len(init_corres)} corres with score > {self.score_thr} ...")
+        return corres
+    def _update_accu(self, accu, update, offset ):
+        pos, scores = update
+        H, W = scores.shape
+        offx, offy = map(lambda i: int(i/4), offset)
+        accu = accu[offy:offy+H, offx:offx+W]
+        better = accu[:,:,4] < scores
+        accu[:,:,4][better] = scores[better].float()
+        accu[:,:,0:4][better] = pos.reshape(H,W,4)[better]
+    def _enumerate_blocks(self, img1, img2, corres):
+        H1, W1, H2, W2 = img1.shape[1:] + img2.shape[1:]
+        size, step = self.fine_size, int(self.overlap * self.fine_size)
+        def regular_steps(size):
+            if size <= self.fine_size: return [0]
+            nb = int(np.ceil(size / step)) - 1 # garranted >= 1
+            return (np.linspace(0, size-self.fine_size, nb) / 4 + 0.5).astype(int) * 4
+        def translation(x,y):
+            res = torch.eye(3, device=img1.device)
+            res[0,2] = x
+            res[1,2] = y
+            return res
+        def block2(x2,y2):
+            return img2[:,y2:y2+size,x2:x2+size], translation(x2,y2)
+        cx1, cy1 = corres[:,0:2].T
+        for y1 in regular_steps(H1):
+          for x1 in regular_steps(W1):
+            block1 = (img1[:,y1:y1+size,x1:x1+size], translation(x1,y1))
+            c2 = corres[(y1<=cy1) & (cy1<y1+size) & (x1<=cx1) & (cx1<x1+size)]
+            nb_init = len(c2)
+            while len(c2):
+                cx2, cy2 = c2[:,2:4].T
+                x2, y2 = (int(max(0,min(W2-size,cx2.median()-size//2)) / 4 + 0.5) * 4,
+                          int(max(0,min(H2-size,cy2.median()-size//2)) / 4 + 0.5) * 4)
+                inside = (y2<=cy2) & (cy2<y2+size) & (x2<=cx2) & (cx2<x2+size)
+                if not inside.any():
+                    x2, y2 = c2[np.random.choice(len(c2)),2:4]
+                    x2 = int(max(0,min(W2-size,x2-size//2)) / 4 + 0.5) * 4
+                    y2 = int(max(0,min(H2-size,y2-size//2)) / 4 + 0.5) * 4
+                    inside = (y2<=cy2) & (cy2<y2+size) & (x2<=cx2) & (cx2<x2+size)
+                if inside.sum()/nb_init >= self.fast_thr:
+                    yield block1, block2(x2,y2)
+                c2 = c2[~inside] # remove
+    def _make_accu(self, img):
+        C, H, W = img.shape
+        return img.new_zeros(((H+3)//4, (W+3)//4, 5), dtype=torch.float32)
+class Main (tss.Main):
+    @staticmethod
+    def build_matcher(args, device):
+        # set coarse and fine size based on now obsolete --resize argument
+        if isinstance(args.resize, int): args.resize = [args.resize]
+        if len(args.resize) == 1: args.resize *= 2
+        args.rec_coarse_size, args.rec_fine_size = args.resize
+        args.resize = 0 # disable it so that image loading does not downsize images
+        options = Main.get_options( args )
+        matcher = RecursivePUMP( coarse_size=args.rec_coarse_size, fine_size=args.rec_fine_size,
+            rec_overlap=args.rec_overlap, rec_score_thr=args.rec_score_thr, rec_fast_thr=args.rec_fast_thr,
+            **options)
+        return tss.Main.tune_matcher(matcher, **vars(args) ).to(device)
+if __name__ == '__main__':
+    Main().run_from_args(arg_parser().parse_args())

tools/common.py ADDED Viewed

	@@ -0,0 +1,95 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+import os
+import torch
+import numpy as np
+def mkdir_for(file_path):
+    dirname = os.path.split(file_path)[0]
+    if dirname: os.makedirs(dirname, exist_ok=True)
+    return file_path
+def model_size(model):
+    ''' Computes the number of parameters of the model
+    '''
+    size = 0
+    for weights in model.state_dict().values():
+        size += np.prod(weights.shape)
+    return size
+class cudnn_benchmark:
+    " context manager to temporarily disable cudnn benchmark "
+    def __init__(self, activate ):
+        self.activate = activate
+    def __enter__(self):
+        self.old_bm = torch.backends.cudnn.benchmark
+        torch.backends.cudnn.benchmark = self.activate
+    def __exit__(self, *args):
+        torch.backends.cudnn.benchmark = self.old_bm
+def todevice(x, device, non_blocking=False):
+    """ Transfer some variables to another device (i.e. GPU, CPU:torch, CPU:numpy).
+    x:      array, tensor, or container of such.
+    device: pytorch device or 'numpy'
+    """
+    if isinstance(x, dict):
+        return {k:todevice(v, device) for k,v in x.items()}
+    if isinstance(x, (tuple,list)):
+        return type(x)(todevice(e, device) for e in x)
+    if device == 'numpy':
+        if isinstance(x, torch.Tensor):
+            x = x.detach().cpu().numpy()
+    elif x is not None:
+        if isinstance(x, np.ndarray):
+            x = torch.from_numpy(x)
+        x = x.to(device, non_blocking=non_blocking)
+    return x
+def nparray( x ): return todevice(x, 'numpy')
+def cpu( x ): return todevice(x, 'cpu')
+def cuda( x ): return todevice(x, 'cuda')
+def image( img, with_trf=False ):
+    " convert a torch.Tensor to a numpy image (H, W, 3) "
+    def convert_image(img):
+        if isinstance(img, torch.Tensor):
+            if img.dtype is not torch.uint8:
+                img = img * 255
+                if img.min() < -10:
+                    img = img.clone()
+                    for i, (mean, std) in enumerate(zip([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])):
+                        img[i] *= std
+                        img[i] += 255*mean
+                img = img.byte()
+            if img.shape[0] <= 3:
+                img = img.permute(1,2,0)
+        return img
+    if isinstance(img, tuple):
+        if with_trf:
+            return nparray(convert_image(img[0])), nparray(img[1])
+        else:
+            img = img[0]
+    return nparray(convert_image(img))
+def image_with_trf( img ):
+    return image(img, with_trf=True)
+class ToTensor:
+    " numpy images to float tensors "
+    def __call__(self, x):
+        assert x.ndim == 4 and x.shape[3] == 3
+        if isinstance(x, np.ndarray):
+            x = torch.from_numpy(x)
+        assert x.dtype == torch.uint8
+        return x.permute(0, 3, 1, 2).float() / 255

tools/trainer.py ADDED Viewed

	@@ -0,0 +1,125 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+import pdb; bb = pdb.set_trace
+from tqdm import tqdm
+from collections import defaultdict
+import torch
+import torch.nn as nn
+from torch.nn import DataParallel
+from .common import todevice
+class Trainer (nn.Module):
+    """ Helper class to train a deep network.
+        Overload this class `forward_backward` for your actual needs.
+    Usage:
+        train = Trainer(net, loss, optimizer)
+        for epoch in range(n_epochs):
+            train()
+    """
+    def __init__(self, net, loss, optimizer, epoch=0):
+        super().__init__()
+        self.net = net
+        self.loss = loss
+        self.optimizer = optimizer
+        self.epoch = epoch
+    @property
+    def device(self):
+        return next(self.net.parameters()).device
+    @property
+    def model(self):
+        return self.net.module if isinstance(self.net, DataParallel) else self.net
+    def distribute(self):
+        self.net = DataParallel(self.net) # DataDistributed not implemented yet
+    def __call__(self, data_loader):
+        print(f'>> Training (epoch {self.epoch} --> {self.epoch+1})')
+        self.net.train()
+        stats = defaultdict(list)
+        for batch in tqdm(data_loader):
+            batch = todevice(batch, self.device)
+            # compute gradient and do model update
+            self.optimizer.zero_grad()
+            details = self.forward_backward(batch)
+            self.optimizer.step()
+            for key, val in details.items():
+                stats[key].append( val )
+        self.epoch += 1
+        print("   Summary of losses during this epoch:")
+        for loss_name, vals in stats.items():
+            N = 1 + len(vals)//10
+            print(f"    - {loss_name:10}: {avg(vals[:N]):.3f} --> {avg(vals[-N:]):.3f} (avg: {avg(vals):.3f})")
+    def forward_backward(self, inputs):
+        raise NotImplementedError()
+    def save(self, path):
+        print(f"\n>> Saving model to {path}")
+        data = {'model': self.model.state_dict(),
+                'optimizer': self.optimizer.state_dict(),
+                'loss': self.loss.state_dict(),
+                'epoch': self.epoch}
+        torch.save(data, open(path,'wb'))
+    def load(self, path, resume=True):
+        print(f">> Loading weights from {path} ...")
+        checkpoint = torch.load(path, map_location='cpu')
+        assert isinstance(checkpoint, dict)
+        self.net.load_state_dict(checkpoint['model'])
+        if resume:
+            self.optimizer.load_state_dict(checkpoint['optimizer'])
+            self.loss.load_state_dict(checkpoint['optimizer'])
+            self.epoch = checkpoint['epoch']
+            print(f"   Resuming training at Epoch {self.epoch}!")
+def get_loss( loss ):
+    """ returns a tuple (loss, dictionary of loss details)
+    """
+    assert isinstance(loss, dict)
+    grads = None
+    k,l = next(iter(loss.items())) # first item is assumed to be the main loss
+    if isinstance(l, tuple):
+        l, grads = l
+        loss[k] = l
+    return (l, grads), {k:float(v) for k,v in loss.items()}
+def backward( loss ):
+    if isinstance(loss, tuple):
+        loss, grads = loss
+    else:
+        loss, grads = (loss, None)
+    assert loss == loss, 'loss is NaN'
+    if grads is None:
+        loss.backward()
+    else:
+        # dictionary of separate subgraphs
+        for var,grad in grads:
+             var.backward(grad)
+    return float(loss)
+def avg( lis ):
+    return sum(lis) / len(lis)

tools/viz.py ADDED Viewed

	@@ -0,0 +1,266 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+import sys
+from pdb import set_trace as bb
+from PIL import Image
+import numpy as np
+import matplotlib.pyplot as pl; pl.ion()
+import torch
+import torch.nn.functional as F
+from core import functional as myF
+from .common import cpu, nparray, image, image_with_trf
+def dbgfig(*args, **kwargs):
+    assert len(args) >= 2
+    dbg = args[-1]
+    if isinstance(dbg, str):
+        dbg = dbg.split()
+    for name in args[:-1]:
+        if {name,'all'} & set(dbg):
+            return pl.figure(name, **kwargs)
+    return False
+def noticks(ax=None):
+    if ax is None: ax = pl.gca()
+    ax.set_xticks(())
+    ax.set_yticks(())
+    return ax
+def plot_grid( corres, ax1, ax2=None, marker='+' ):
+    """ corres = Nx2 or Nx4 list of correspondences
+    """
+    if marker is True: marker = '+'
+    corres = nparray(corres)
+    # make beautiful colors
+    center = corres[:,[1,0]].mean(axis=0)
+    colors = np.arctan2(*(corres[:,[1,0]] - center).T)
+    colors = np.int32(64*colors/np.pi) % 128
+    all_colors = np.unique(colors)
+    palette = {m:pl.cm.hsv(i/float(len(all_colors))) for i,m in enumerate(all_colors)}
+    for m in all_colors:
+        x, y = corres[colors==m,0:2].T
+        ax1.plot(x, y, marker, ms=10, mew=2, color=palette[m], scalex=0, scaley=0)
+    if not ax2: return
+    for m in all_colors:
+        x, y = corres[colors==m,2:4].T
+        ax2.plot(x, y, marker, ms=10, mew=2, color=palette[m], scalex=0, scaley=0)
+def show_correspondences( img0, img1, corres, F=None, fig='last', show_grid=True, bb=None, clf=False):
+    img0, trf0 = img0 if isinstance(img0, tuple) else (img0, torch.eye(3))
+    img1, trf1 = img1 if isinstance(img1, tuple) else (img1, torch.eye(3))
+    if not bb: pl.ioff()
+    fig, axes = pl.subplots(2, 2, num=fig_num(fig, 'viz_corres'))
+    for i, ax in enumerate(axes.ravel()):
+        if clf: ax.cla()
+        noticks(ax).numaxis = i % 2
+        ax.imshow( [image(img0),image(img1)][i%2] )
+    if corres.shape == (3,3): # corres is an homography matrix
+        from pytools.hfuncs import applyh
+        H, W = axes[0,0].images[0].get_size()
+        pos1 = np.mgrid[:H,:W].reshape(2,-1)[::-1].T
+        pos2 = applyh(corres, pos1)
+        corres = np.concatenate((pos1,pos2), axis=-1)
+    inv = np.linalg.inv
+    corres = myF.affmul((inv(nparray(trf0)),inv(nparray(trf1))), nparray(corres)) # image are already downscaled
+    print(f">> Displaying {len(corres)} correspondences (move you mouse over the images)")
+    (ax1, ax2), (ax3, ax4) = axes
+    if corres.shape[-1] > 4:
+        corres = corres[corres[:,4]>0,:] # select non-null correspondences
+    if show_grid: plot_grid(corres, ax3, ax4, marker=show_grid)
+    def mouse_move(event):
+        if event.inaxes==None: return
+        numaxis = event.inaxes.numaxis
+        if numaxis<0: return
+        x,y = event.xdata, event.ydata
+        ax1.lines.clear()
+        ax2.lines.clear()
+        sl = slice(2*numaxis, 2*(numaxis+1))
+        n = np.sum((corres[:,sl] - [x,y])**2,axis=1).argmin() # find nearest point
+        print("\rdisplaying #%d (%d,%d) --> (%d,%d), score=%g, code=%g" % (n,
+            corres[n,0],corres[n,1],corres[n,2],corres[n,3],
+            corres[n,4] if corres.shape[-1] > 4 else np.nan,
+            corres[n,5] if corres.shape[-1] > 5 else np.nan), end=' '*7);sys.stdout.flush()
+        x,y = corres[n,0:2]
+        ax1.plot(x, y, '+', ms=10, mew=2, color='blue', scalex=False, scaley=False)
+        x,y = corres[n,2:4]
+        ax2.plot(x, y, '+', ms=10, mew=2, color='red', scalex=False, scaley=False)
+        if F is not None:
+            ax = None
+            if numaxis == 0:
+                line = corres[n,0:2] @ F[:2] + F[2]
+                ax = ax2
+            if numaxis == 1:
+                line = corres[n,2:4] @ F.T[:2] + F.T[2]
+                ax = ax1
+            if ax:
+                x = np.linspace(-10000,10000,2)
+                y = (line[2]+line[0]*x) / -line[1]
+                ax.plot(x, y, '-', scalex=0, scaley=0)
+        # we redraw only the concerned axes
+        renderer = fig.canvas.get_renderer()
+        ax1.draw(renderer)
+        ax2.draw(renderer)
+        fig.canvas.blit(ax1.bbox)
+        fig.canvas.blit(ax2.bbox)
+    cid_move = fig.canvas.mpl_connect('motion_notify_event',mouse_move)
+    pl.subplots_adjust(left=0.01, bottom=0.01, right=0.99, top=0.99, wspace=0.02, hspace=0.02)
+    bb() if bb else pl.show()
+    fig.canvas.mpl_disconnect(cid_move)
+def closest( grid, event ):
+    query = (event.xdata, event.ydata)
+    n = np.linalg.norm(grid.reshape(-1,2) - query, axis=1).argmin()
+    return np.unravel_index(n, grid.shape[:2])
+def local_maxima( arr2d, top=5 ):
+    maxpooled = F.max_pool2d( arr2d[None, None], 3, padding=1, stride=1)[0,0]
+    local_maxima = (arr2d == maxpooled).nonzero()
+    order = arr2d[local_maxima.split(1,dim=1)].ravel().argsort()
+    return local_maxima[order[-5:]].T
+def fig_num( fig, default, clf=False ):
+    if fig == 'last': num = pl.gcf().number
+    elif fig: num = fig.number
+    else: num = default
+    if clf: pl.figure(num).clf()
+    return num
+def viz_correlation_maps( img1, img2, corr, level=0, fig=None, grid1=None, grid2=None, show_grid=False, bb=bb, **kw ):
+    fig, ((ax1, ax2), (ax4, ax3)) = pl.subplots(2, 2, num=fig_num(fig, 'viz_correlation_maps', clf=True))
+    img1 = image(img1)
+    img2 = image(img2)
+    noticks(ax1).imshow( img1 )
+    noticks(ax2).imshow( img2 )
+    ax4.hist(corr.ravel()[7:7777777:7].cpu().numpy(), bins=50)
+    if isinstance(corr, tuple):
+        H1, W1 = corr.grid.shape[:2]
+        corr = torch.from_numpy(corr.res_map).view(H1,W1,*corr.res_map.shape[-2:])
+    if grid1 is None:
+        s1 = int(0.5 + np.sqrt(img1.size / (3 * corr[...,0,0].numel()))) # scale factor between img1 and corr
+        grid1 = nparray(torch.ones_like(corr[:,:,0,0]).nonzero()*s1)[:,1::-1]
+        if level == 0: grid1 += s1//2
+    if show_grid: plot_grid(grid1, ax1)
+    grid1 = nparray(grid1).reshape(*corr[:,:,0,0].shape,2)
+    if grid2 is None:
+        s2 = int(0.5 + np.sqrt(img2.size / (3 * corr[0,0,...].numel()))) # scale factor between img2 and corr
+        grid2 = nparray(torch.ones_like(corr[0,0]).nonzero()*s2)[:,::-1]
+    grid2 = nparray(grid2).reshape(*corr.shape[2:],2)
+    def mouse_move(ev):
+        if ev.inaxes is ax1:
+            ax3.images.clear()
+            n = closest(grid1, ev)
+            ax3.imshow(corr[n].cpu().float(), vmin=0, **kw)
+            # find local maxima
+            lm = nparray(local_maxima(corr[n]))
+            for ax in (ax3, ax2):
+                if ax is ax2 and not show_grid:
+                    ax1.lines.clear()
+                    ax1.plot(*grid1[n], 'xr', ms=10, scalex=0, scaley=0)
+                ax.lines.clear()
+                x, y = grid2[y,x].T if ax is ax2 else lm[::-1]
+                if ax is not ax3:
+                    ax.plot(x, y, 'xr', ms=10, scalex=0, scaley=0, label='local maxima')
+            print(f"\rCorr channel {n}. Min={corr[n].min():g}, Avg={corr[n].mean():g}, Max={corr[n].max():g}   ", end='')
+    mouse_move(FakeEvent(0,0,inaxes=ax1))
+    cid_move = fig.canvas.mpl_connect('motion_notify_event', mouse_move)
+    pl.subplots_adjust(0,0,1,1,0,0)
+    pl.sca(ax4)
+    if bb: bb(); fig.canvas.mpl_disconnect(cid_move)
+def viz_correspondences( img1, img2, corres1, corres2, fig=None ):
+    img1, img2 = map(image, (img1, img2))
+    fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = pl.subplots(3,2, num=fig_num(fig, 'viz_correspondences'))
+    for ax in fig.axes: noticks(ax)
+    ax1.imshow( img1 )
+    ax2.imshow( img2 )
+    ax3.imshow( img1 )
+    ax4.imshow( img2 )
+    corres1, corres2 = map(cpu, (corres1, corres2))
+    plot_grid( corres1[0], ax1, ax2 )
+    plot_grid( corres2[0], ax3, ax4 )
+    corres1, corres2 = corres1[1].float(), corres2[1].float()
+    ceiling = np.ceil(max(corres1.max(), corres2.max()).item())
+    ax5.imshow( corres1, vmin=0, vmax=ceiling )
+    ax6.imshow( corres2, vmin=0, vmax=ceiling )
+    bb()
+class FakeEvent:
+    def __init__(self, xdata, ydata, **kw):
+        self.xdata = xdata
+        self.ydata = ydata
+        for name, val in kw.items():
+            setattr(self, name, val)
+def show_random_pairs( db, pair_idxs=None, **kw ):
+    print('Showing random pairs from', db)
+    if pair_idxs is None:
+        pair_idxs = np.random.permutation(len(db))
+    for pair_idx in pair_idxs:
+        print(f'{pair_idx=}')
+        try:
+            img1_path, img2_path = map(db.imgs.get_image_path, db.pairs[pair_idx])
+            print(f'{img1_path=}\n{img2_path=}')
+            if hasattr(db, 'get_corres_path'):
+                print(f'corres_path = {db.get_corres_path(pair_idx)}')
+        except: pass
+        (img1, img2), gt = db[pair_idx]
+        if 'corres' in gt:
+            corres = gt['corres']
+        else:
+            # make corres from homography
+            from datasets.utils import corres_from_homography
+            corres = corres_from_homography(gt['homography'], *img1.size)
+        show_correspondences(img1, img2, corres, **kw)
+if __name__=='__main__':
+    import argparse
+    import test_singlescale as pump
+    parser = argparse.ArgumentParser('Correspondence visualization')
+    parser.add_argument('--img1', required=True, help='path to first image')
+    parser.add_argument('--img2', required=True, help='path to second image')
+    parser.add_argument('--corres', required=True, help='path to correspondences')
+    args = parser.parse_args()
+    corres = np.load(args.corres)['corres']
+    args.resize = 0 # don't resize images
+    imgs = tuple(map(image, pump.Main.load_images(args)))
+    show_correspondences(*imgs, corres)

train.py ADDED Viewed

	@@ -0,0 +1,121 @@

+# Copyright 2022-present NAVER Corp.
+# CC BY-NC-SA 4.0
+# Available only for non-commercial use
+from pdb import set_trace as bb
+import os
+import torch
+import torch.optim as optim
+import torchvision.transforms as tvf
+from tools import common, trainer
+from datasets import *
+from core.conv_mixer import ConvMixer
+from core.losses import *
+def parse_args():
+    import argparse
+    parser = argparse.ArgumentParser("Script to train PUMP")
+    parser.add_argument("--pretrained", type=str, default="", help='pretrained model path')
+    parser.add_argument("--save-path", type=str, required=True, help='directory to save model')
+    parser.add_argument("--epochs", type=int, default=50, help='number of training epochs')
+    parser.add_argument("--batch-size", "--bs", type=int, default=16, help="batch size")
+    parser.add_argument("--learning-rate", "--lr", type=str, default=1e-4)
+    parser.add_argument("--weight-decay", "--wd", type=float, default=5e-4)
+    parser.add_argument("--threads", type=int, default=8, help='number of worker threads')
+    parser.add_argument("--device", default='cuda')
+    args = parser.parse_args()
+    return args
+def main( args ):
+    device = args.device
+    common.mkdir_for(args.save_path)
+    # Create data loader
+    db = BalancedCatImagePairs(
+            3125, SyntheticImagePairs(RandomWebImages(0,52),distort='RandomTilting(0.5)'),
+            4875, SyntheticImagePairs(SfM120k_Images(),distort='RandomTilting(0.5)'),
+            8000, SfM120k_Pairs())
+    db = FastPairLoader(db,
+            crop=256, transform='RandomRotation(20), RandomScale(256,1536,ar=1.3,can_upscale=True), PixelNoise(25)',
+            p_swap=0.5, p_flip=0.5, scale_jitter=0.5)
+    print("Training image database =", db)
+    data_loader = torch.utils.data.DataLoader(db, batch_size=args.batch_size, shuffle=True,
+            num_workers=args.threads, collate_fn=collate_ordered, pin_memory=False, drop_last=True,
+            worker_init_fn=WorkerWithRngInit())
+    # create network
+    net = ConvMixer(output_dim=128, hidden_dim=512, depth=7, patch_size=4, kernel_size=9)
+    print(f"\n>> Creating {type(net).__name__} net ( Model size: {common.model_size(net)/1e6:.1f}M parameters )")
+    # create losses
+    loss = MultiLoss(alpha=0.3,
+            loss_sup = PixelAPLoss(nq=20, inner_bw=True, sampler=NghSampler(ngh=7)),
+            loss_unsup = DeepMatchingLoss(eps=0.03))
+    # create optimizer
+    optimizer = optim.Adam( [p for p in net.parameters() if p.requires_grad],
+                            lr=args.learning_rate, weight_decay=args.weight_decay)
+    train = MyTrainer(net, loss, optimizer).to(device)
+    # initialization
+    final_model_path = osp.join(args.save_path,'model.pt')
+    last_model_path = osp.join(args.save_path,'model.pt.last')
+    if osp.exists( final_model_path ):
+        print('Already trained, nothing to do!')
+        return
+    elif args.pretrained:
+        train.load( args.pretrained )
+    elif osp.exists( last_model_path ):
+        train.load( last_model_path )
+    train = train.to(args.device)
+    if ',' in os.environ.get('CUDA_VISIBLE_DEVICES',''):
+        train.distribute()
+    # Training loop #
+    while train.epoch < args.epochs:
+        # shuffle dataset (select new pairs)
+        data_loader.dataset.set_epoch(train.epoch)
+        train(data_loader)
+        train.save(last_model_path)
+    # save final model
+    torch.save(train.model.state_dict(), open(final_model_path,'wb'))
+totensor = tvf.Compose([
+    common.ToTensor(),
+    tvf.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    ])
+class MyTrainer (trainer.Trainer):
+    """ This class implements the network training.
+        Below is the function I need to overload to explain how to do the backprop.
+    """
+    def forward_backward(self, inputs):
+        assert torch.is_grad_enabled() and self.net.training
+        (img1, img2), labels = inputs
+        output1 = self.net(totensor(img1))
+        output2 = self.net(totensor(img2))
+        loss, details = trainer.get_loss(self.loss(output1, output2, img1=img1, img2=img2, **labels))
+        trainer.backward(loss)
+        return details
+if __name__ == '__main__':
+    main(parse_args())