Spaces:

imseldrith
/

Colorize

Build error

App Files Files Community

imseldrith commited on Jan 23, 2024

Commit

785bb48

verified ·

1 Parent(s): 84205e9

Upload folder using huggingface_hub (#1)

Browse files

- Upload folder using huggingface_hub (cb2df8ba8d0f21c17856586ba796d689100b836c)

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
LICENSE +201 -0
MODEL_ZOO.md +13 -0
VERSION +1 -0
assets/anime_landscapes.png +3 -0
assets/network_arch.jpg +0 -0
assets/teaser.png +3 -0
assets/test_images/Abandoned Boy Holding a Stuffed Toy Animal. London 1945.jpg +0 -0
assets/test_images/Acrobats Balance On Top Of The Empire State Building, 1934.jpg +0 -0
assets/test_images/Ansel Adams _ Moore Photography.jpeg +0 -0
assets/test_images/Audrey Hepburn.jpg +0 -0
assets/test_images/Broadway at the United States Hotel Saratoga Springs, N.Y. ca 1900-1915.jpg +0 -0
assets/test_images/Buffalo Bank Buffalo, New York, circa 1908. Erie County Savings Bank, Niagara Street.jpg +0 -0
assets/test_images/Detroit circa 1915.jpg +0 -0
Crafting a Future.jpeg +0 -0
assets/test_images/February 1936. Nipomo, Calif. Destitute pea pickers living in tent in migrant camp. Mother of seven children. Age 32.jpg +0 -0
assets/test_images/Helen Keller meeting Charlie Chaplin in 1919.jpg +0 -0
assets/test_images/Louis Armstrong practicing in his dressing room, ca 1946.jpg +0 -0
assets/test_images/New York Riverfront December 15, 1931.jpg +0 -0
assets/test_images/colorized-historical-photos-vintage-photography-39.jpg +0 -0
basicsr/__init__.py +12 -0
basicsr/__pycache__/__init__.cpython-310.pyc +0 -0
basicsr/__pycache__/train.cpython-310.pyc +0 -0
basicsr/archs/__init__.py +25 -0
basicsr/archs/__pycache__/__init__.cpython-310.pyc +0 -0
basicsr/archs/__pycache__/ddcolor_arch.cpython-310.pyc +0 -0
basicsr/archs/__pycache__/discriminator_arch.cpython-310.pyc +0 -0
basicsr/archs/__pycache__/vgg_arch.cpython-310.pyc +0 -0
basicsr/archs/ddcolor_arch.py +385 -0
basicsr/archs/ddcolor_arch_utils/__int__.py +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/convnext.cpython-310.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/convnext.cpython-38.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/position_encoding.cpython-310.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/position_encoding.cpython-38.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/transformer.cpython-310.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/transformer.cpython-38.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/transformer_utils.cpython-310.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/transformer_utils.cpython-38.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/unet.cpython-310.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/unet.cpython-38.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/convnext.py +155 -0
basicsr/archs/ddcolor_arch_utils/position_encoding.py +52 -0
basicsr/archs/ddcolor_arch_utils/transformer.py +368 -0
basicsr/archs/ddcolor_arch_utils/transformer_utils.py +192 -0
basicsr/archs/ddcolor_arch_utils/unet.py +208 -0
basicsr/archs/ddcolor_arch_utils/util.py +63 -0
basicsr/archs/discriminator_arch.py +28 -0
basicsr/archs/vgg_arch.py +165 -0
basicsr/data/__init__.py +101 -0
basicsr/data/__pycache__/__init__.cpython-310.pyc +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/anime_landscapes.png filter=lfs diff=lfs merge=lfs -text
+assets/teaser.png filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

MODEL_ZOO.md ADDED Viewed

	@@ -0,0 +1,13 @@

+## DDColor Model Zoo
+| Model                 | Description          |  Note |
+| ---------------------- | :------------------ | :-----|
+| [ddcolor_paper.pth](https://huggingface.co/piddnad/DDColor-models/resolve/main/ddcolor_paper.pth)      | DDColor-L trained on ImageNet   | paper model, use it only if you want to reproduce some of the images in the paper.
+| [ddcolor_modelscope.pth](https://huggingface.co/piddnad/DDColor-models/resolve/main/ddcolor_modelscope.pth) (***default***)  | DDColor-L trained on ImageNet   | We trained this model using the same data cleaning scheme as [BigColor](https://github.com/KIMGEONUNG/BigColor/issues/2#issuecomment-1196287574), so it can get the best qualitative results with little degrading FID performance. Use this model by default if you want to test images outside the ImageNet. It can also be easily downloaded through ModelScope [in this way](README.md#inference-with-modelscope-library).
+| [ddcolor_artistic.pth](https://huggingface.co/piddnad/DDColor-models/resolve/main/ddcolor_artistic.pth) | DDColor-L trained on ImageNet + private data | We trained this model with an extended dataset containing many high-quality artistic images. Also, we didn't use colorfulness loss during training, so there may be fewer unreasonable color artifacts. Use this model if you want to try different colorization results.
+| [ddcolor_paper_tiny.pth](https://huggingface.co/piddnad/DDColor-models/resolve/main/ddcolor_paper_tiny.pth) | DDColor-T trained on ImageNet   | The most lightweight version of ddcolor model, using the same training scheme as ddcolor_paper.
+## Discussions
+* About Colorfulness Loss (CL): CL can encourage more "colorful" results and help improve CF scores, however, it sometimes leads to the generation of unpleasant color blocks (eg. red color artifacts). If something goes wrong, I personally recommend trying to remove it during training.

VERSION ADDED Viewed

	@@ -0,0 +1 @@


1	+ 1.3.4.6

assets/anime_landscapes.png ADDED Viewed

Git LFS Details

SHA256: 592ca355853bba3f5bb331af08f76190ecdece9bbb04ff9d9fc75db2b6debc1a
Pointer size: 132 Bytes
Size of remote file: 8.22 MB

assets/network_arch.jpg ADDED Viewed

assets/teaser.png ADDED Viewed

Git LFS Details

SHA256: 4bf224bd3bbfe1da643569fb408259b39fa8d9cdd0231bfddcca48d96a53b1e3
Pointer size: 132 Bytes
Size of remote file: 3.59 MB

assets/test_images/Abandoned Boy Holding a Stuffed Toy Animal. London 1945.jpg ADDED Viewed

assets/test_images/Acrobats Balance On Top Of The Empire State Building, 1934.jpg ADDED Viewed

assets/test_images/Ansel Adams _ Moore Photography.jpeg ADDED Viewed

assets/test_images/Audrey Hepburn.jpg ADDED Viewed

assets/test_images/Broadway at the United States Hotel Saratoga Springs, N.Y. ca 1900-1915.jpg ADDED Viewed

assets/test_images/Buffalo Bank Buffalo, New York, circa 1908. Erie County Savings Bank, Niagara Street.jpg ADDED Viewed

assets/test_images/Detroit circa 1915.jpg ADDED Viewed

Crafting a Future.jpeg RENAMED Viewed

File without changes

assets/test_images/February 1936. Nipomo, Calif. Destitute pea pickers living in tent in migrant camp. Mother of seven children. Age 32.jpg ADDED Viewed

assets/test_images/Helen Keller meeting Charlie Chaplin in 1919.jpg ADDED Viewed

assets/test_images/Louis Armstrong practicing in his dressing room, ca 1946.jpg ADDED Viewed

assets/test_images/New York Riverfront December 15, 1931.jpg ADDED Viewed

assets/test_images/colorized-historical-photos-vintage-photography-39.jpg ADDED Viewed

basicsr/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# https://github.com/xinntao/BasicSR
+# flake8: noqa
+from .archs import *
+from .data import *
+from .losses import *
+from .metrics import *
+from .models import *
+# from .ops import *
+# from .test import *
+from .train import *
+from .utils import *
+# from .version import __gitsha__, __version__

basicsr/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (256 Bytes). View file

basicsr/__pycache__/train.cpython-310.pyc ADDED Viewed

Binary file (6.55 kB). View file

basicsr/archs/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import importlib
+from copy import deepcopy
+from os import path as osp
+from basicsr.utils import get_root_logger, scandir
+from basicsr.utils.registry import ARCH_REGISTRY
+__all__ = ['build_network']
+# automatically scan and import arch modules for registry
+# scan all the files under the 'archs' folder and collect files ending with
+# '_arch.py'
+arch_folder = osp.dirname(osp.abspath(__file__))
+arch_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(arch_folder) if v.endswith('_arch.py')]
+# import all the arch modules
+_arch_modules = [importlib.import_module(f'basicsr.archs.{file_name}') for file_name in arch_filenames]
+def build_network(opt):
+    opt = deepcopy(opt)
+    network_type = opt.pop('type')
+    net = ARCH_REGISTRY.get(network_type)(**opt)
+    logger = get_root_logger()
+    logger.info(f'Network [{net.__class__.__name__}] is created.')
+    return net

basicsr/archs/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (1.13 kB). View file

basicsr/archs/__pycache__/ddcolor_arch.cpython-310.pyc ADDED Viewed

Binary file (10.5 kB). View file

basicsr/archs/__pycache__/discriminator_arch.cpython-310.pyc ADDED Viewed

Binary file (1.36 kB). View file

basicsr/archs/__pycache__/vgg_arch.cpython-310.pyc ADDED Viewed

Binary file (4.87 kB). View file

basicsr/archs/ddcolor_arch.py ADDED Viewed

	@@ -0,0 +1,385 @@

+import torch
+import torch.nn as nn
+from basicsr.archs.ddcolor_arch_utils.unet import Hook, CustomPixelShuffle_ICNR,  UnetBlockWide, NormType, custom_conv_layer
+from basicsr.archs.ddcolor_arch_utils.convnext import ConvNeXt
+from basicsr.archs.ddcolor_arch_utils.transformer_utils import SelfAttentionLayer, CrossAttentionLayer, FFNLayer, MLP
+from basicsr.archs.ddcolor_arch_utils.position_encoding import PositionEmbeddingSine
+from basicsr.archs.ddcolor_arch_utils.transformer import Transformer
+from basicsr.utils.registry import ARCH_REGISTRY
+@ARCH_REGISTRY.register()
+class DDColor(nn.Module):
+    def __init__(self,
+                 encoder_name='convnext-l',
+                 decoder_name='MultiScaleColorDecoder',
+                 num_input_channels=3,
+                 input_size=(256, 256),
+                 nf=512,
+                 num_output_channels=3,
+                 last_norm='Weight',
+                 do_normalize=False,
+                 num_queries=256,
+                 num_scales=3,
+                 dec_layers=9,
+                 encoder_from_pretrain=False):
+        super().__init__()
+        self.encoder = Encoder(encoder_name, ['norm0', 'norm1', 'norm2', 'norm3'], from_pretrain=encoder_from_pretrain)
+        self.encoder.eval()
+        test_input = torch.randn(1, num_input_channels, *input_size)
+        self.encoder(test_input)
+        self.decoder = Decoder(
+            self.encoder.hooks,
+            nf=nf,
+            last_norm=last_norm,
+            num_queries=num_queries,
+            num_scales=num_scales,
+            dec_layers=dec_layers,
+            decoder_name=decoder_name
+        )
+        self.refine_net = nn.Sequential(custom_conv_layer(num_queries + 3, num_output_channels, ks=1, use_activ=False, norm_type=NormType.Spectral))
+        self.do_normalize = do_normalize
+        self.register_buffer('mean', torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
+        self.register_buffer('std', torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
+    def normalize(self, img):
+        return (img - self.mean) / self.std
+    def denormalize(self, img):
+        return img * self.std + self.mean
+    def forward(self, x):
+        if x.shape[1] == 3:
+            x = self.normalize(x)
+        self.encoder(x)
+        out_feat = self.decoder()
+        coarse_input = torch.cat([out_feat, x], dim=1)
+        out = self.refine_net(coarse_input)
+        if self.do_normalize:
+            out = self.denormalize(out)
+        return out
+class Decoder(nn.Module):
+    def __init__(self,
+                 hooks,
+                 nf=512,
+                 blur=True,
+                 last_norm='Weight',
+                 num_queries=256,
+                 num_scales=3,
+                 dec_layers=9,
+                 decoder_name='MultiScaleColorDecoder'):
+        super().__init__()
+        self.hooks = hooks
+        self.nf = nf
+        self.blur = blur
+        self.last_norm = getattr(NormType, last_norm)
+        self.decoder_name = decoder_name
+        self.layers = self.make_layers()
+        embed_dim = nf // 2
+        self.last_shuf = CustomPixelShuffle_ICNR(embed_dim, embed_dim, blur=self.blur, norm_type=self.last_norm, scale=4)
+        if self.decoder_name == 'MultiScaleColorDecoder':
+            self.color_decoder = MultiScaleColorDecoder(
+                in_channels=[512, 512, 256],
+                num_queries=num_queries,
+                num_scales=num_scales,
+                dec_layers=dec_layers,
+            )
+        else:
+            self.color_decoder = SingleColorDecoder(
+                in_channels=hooks[-1].feature.shape[1],
+                num_queries=num_queries,
+            )
+    def forward(self):
+        encode_feat = self.hooks[-1].feature
+        out0 = self.layers[0](encode_feat)
+        out1 = self.layers[1](out0)
+        out2 = self.layers[2](out1)
+        out3 = self.last_shuf(out2)
+        if self.decoder_name == 'MultiScaleColorDecoder':
+            out = self.color_decoder([out0, out1, out2], out3)
+        else:
+            out = self.color_decoder(out3, encode_feat)
+        return out
+    def make_layers(self):
+        decoder_layers = []
+        e_in_c = self.hooks[-1].feature.shape[1]
+        in_c = e_in_c
+        out_c = self.nf
+        setup_hooks = self.hooks[-2::-1]
+        for layer_index, hook in enumerate(setup_hooks):
+            feature_c = hook.feature.shape[1]
+            if layer_index == len(setup_hooks) - 1:
+                out_c = out_c // 2
+            decoder_layers.append(
+                UnetBlockWide(
+                    in_c, feature_c, out_c, hook, blur=self.blur, self_attention=False, norm_type=NormType.Spectral))
+            in_c = out_c
+        return nn.Sequential(*decoder_layers)
+class Encoder(nn.Module):
+    def __init__(self, encoder_name, hook_names, from_pretrain, **kwargs):
+        super().__init__()
+        if encoder_name == 'convnext-t' or encoder_name == 'convnext':
+            self.arch = ConvNeXt()
+        elif encoder_name == 'convnext-s':
+            self.arch = ConvNeXt(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768])
+        elif encoder_name == 'convnext-b':
+            self.arch = ConvNeXt(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024])
+        elif encoder_name == 'convnext-l':
+            self.arch = ConvNeXt(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536])
+        else:
+            raise NotImplementedError
+        self.encoder_name = encoder_name
+        self.hook_names = hook_names
+        self.hooks = self.setup_hooks()
+        if from_pretrain:
+            self.load_pretrain_model()
+    def setup_hooks(self):
+        hooks = [Hook(self.arch._modules[name]) for name in self.hook_names]
+        return hooks
+    def forward(self, x):
+        return self.arch(x)
+    def load_pretrain_model(self):
+        if self.encoder_name == 'convnext-t' or self.encoder_name == 'convnext':
+            self.load('pretrain/convnext_tiny_22k_224.pth')
+        elif self.encoder_name == 'convnext-s':
+            self.load('pretrain/convnext_small_22k_224.pth')
+        elif self.encoder_name == 'convnext-b':
+            self.load('pretrain/convnext_base_22k_224.pth')
+        elif self.encoder_name == 'convnext-l':
+            self.load('pretrain/convnext_large_22k_224.pth')
+        else:
+            raise NotImplementedError
+        print('Loaded pretrained convnext model.')
+    def load(self, path):
+        from basicsr.utils import get_root_logger
+        logger = get_root_logger()
+        if not path:
+            logger.info("No checkpoint found. Initializing model from scratch")
+            return
+        logger.info("[Encoder] Loading from {} ...".format(path))
+        checkpoint = torch.load(path, map_location=torch.device("cpu"))
+        checkpoint_state_dict = checkpoint['model'] if 'model' in checkpoint.keys() else checkpoint
+        incompatible = self.arch.load_state_dict(checkpoint_state_dict, strict=False)
+        if incompatible.missing_keys:
+            msg = "Some model parameters or buffers are not found in the checkpoint:\n"
+            msg += str(incompatible.missing_keys)
+            logger.warning(msg)
+        if incompatible.unexpected_keys:
+            msg = "The checkpoint state_dict contains keys that are not used by the model:\n"
+            msg += str(incompatible.unexpected_keys)
+            logger.warning(msg)
+class MultiScaleColorDecoder(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        hidden_dim=256,
+        num_queries=100,
+        nheads=8,
+        dim_feedforward=2048,
+        dec_layers=9,
+        pre_norm=False,
+        color_embed_dim=256,
+        enforce_input_project=True,
+        num_scales=3
+    ):
+        super().__init__()
+        # positional encoding
+        N_steps = hidden_dim // 2
+        self.pe_layer = PositionEmbeddingSine(N_steps, normalize=True)
+        # define Transformer decoder here
+        self.num_heads = nheads
+        self.num_layers = dec_layers
+        self.transformer_self_attention_layers = nn.ModuleList()
+        self.transformer_cross_attention_layers = nn.ModuleList()
+        self.transformer_ffn_layers = nn.ModuleList()
+        for _ in range(self.num_layers):
+            self.transformer_self_attention_layers.append(
+                SelfAttentionLayer(
+                    d_model=hidden_dim,
+                    nhead=nheads,
+                    dropout=0.0,
+                    normalize_before=pre_norm,
+                )
+            )
+            self.transformer_cross_attention_layers.append(
+                CrossAttentionLayer(
+                    d_model=hidden_dim,
+                    nhead=nheads,
+                    dropout=0.0,
+                    normalize_before=pre_norm,
+                )
+            )
+            self.transformer_ffn_layers.append(
+                FFNLayer(
+                    d_model=hidden_dim,
+                    dim_feedforward=dim_feedforward,
+                    dropout=0.0,
+                    normalize_before=pre_norm,
+                )
+            )
+        self.decoder_norm = nn.LayerNorm(hidden_dim)
+        self.num_queries = num_queries
+        # learnable color query features
+        self.query_feat = nn.Embedding(num_queries, hidden_dim)
+        # learnable color query p.e.
+        self.query_embed = nn.Embedding(num_queries, hidden_dim)
+        # level embedding
+        self.num_feature_levels = num_scales
+        self.level_embed = nn.Embedding(self.num_feature_levels, hidden_dim)
+        # input projections
+        self.input_proj = nn.ModuleList()
+        for i in range(self.num_feature_levels):
+            if in_channels[i] != hidden_dim or enforce_input_project:
+                self.input_proj.append(nn.Conv2d(in_channels[i], hidden_dim, kernel_size=1))
+                nn.init.kaiming_uniform_(self.input_proj[-1].weight, a=1)
+                if self.input_proj[-1].bias is not None:
+                    nn.init.constant_(self.input_proj[-1].bias, 0)
+            else:
+                self.input_proj.append(nn.Sequential())
+        # output FFNs
+        self.color_embed = MLP(hidden_dim, hidden_dim, color_embed_dim, 3)
+    def forward(self, x, img_features):
+        # x is a list of multi-scale feature
+        assert len(x) == self.num_feature_levels
+        src = []
+        pos = []
+        for i in range(self.num_feature_levels):
+            pos.append(self.pe_layer(x[i], None).flatten(2))
+            src.append(self.input_proj[i](x[i]).flatten(2) + self.level_embed.weight[i][None, :, None])
+            # flatten NxCxHxW to HWxNxC
+            pos[-1] = pos[-1].permute(2, 0, 1)
+            src[-1] = src[-1].permute(2, 0, 1)
+        _, bs, _ = src[0].shape
+        # QxNxC
+        query_embed = self.query_embed.weight.unsqueeze(1).repeat(1, bs, 1)
+        output = self.query_feat.weight.unsqueeze(1).repeat(1, bs, 1)
+        for i in range(self.num_layers):
+            level_index = i % self.num_feature_levels
+            # attention: cross-attention first
+            output = self.transformer_cross_attention_layers[i](
+                output, src[level_index],
+                memory_mask=None,
+                memory_key_padding_mask=None,
+                pos=pos[level_index], query_pos=query_embed
+            )
+            output = self.transformer_self_attention_layers[i](
+                output, tgt_mask=None,
+                tgt_key_padding_mask=None,
+                query_pos=query_embed
+            )
+            # FFN
+            output = self.transformer_ffn_layers[i](
+                output
+            )
+        decoder_output = self.decoder_norm(output)
+        decoder_output = decoder_output.transpose(0, 1)  # [N, bs, C]  -> [bs, N, C]
+        color_embed = self.color_embed(decoder_output)
+        out = torch.einsum("bqc,bchw->bqhw", color_embed, img_features)
+        return out
+class SingleColorDecoder(nn.Module):
+    def __init__(
+        self,
+        in_channels=768,
+        hidden_dim=256,
+        num_queries=256,  # 100
+        nheads=8,
+        dropout=0.1,
+        dim_feedforward=2048,
+        enc_layers=0,
+        dec_layers=6,
+        pre_norm=False,
+        deep_supervision=True,
+        enforce_input_project=True,
+    ):
+        super().__init__()
+        N_steps = hidden_dim // 2
+        self.pe_layer = PositionEmbeddingSine(N_steps, normalize=True)
+        transformer = Transformer(
+            d_model=hidden_dim,
+            dropout=dropout,
+            nhead=nheads,
+            dim_feedforward=dim_feedforward,
+            num_encoder_layers=enc_layers,
+            num_decoder_layers=dec_layers,
+            normalize_before=pre_norm,
+            return_intermediate_dec=deep_supervision,
+        )
+        self.num_queries = num_queries
+        self.transformer = transformer
+        self.query_embed = nn.Embedding(num_queries, hidden_dim)
+        if in_channels != hidden_dim or enforce_input_project:
+            self.input_proj = nn.Conv2d(in_channels, hidden_dim, kernel_size=1)
+            nn.init.kaiming_uniform_(self.input_proj.weight, a=1)
+            if self.input_proj.bias is not None:
+                nn.init.constant_(self.input_proj.bias, 0)
+        else:
+            self.input_proj = nn.Sequential()
+    def forward(self, img_features, encode_feat):
+        pos = self.pe_layer(encode_feat)
+        src = encode_feat
+        mask = None
+        hs, memory = self.transformer(self.input_proj(src), mask, self.query_embed.weight, pos)
+        color_embed = hs[-1]
+        color_preds = torch.einsum('bqc,bchw->bqhw', color_embed, img_features)
+        return color_preds

basicsr/archs/ddcolor_arch_utils/__int__.py ADDED Viewed

File without changes

basicsr/archs/ddcolor_arch_utils/__pycache__/convnext.cpython-310.pyc ADDED Viewed

Binary file (6.08 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/convnext.cpython-38.pyc ADDED Viewed

Binary file (6.2 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/position_encoding.cpython-310.pyc ADDED Viewed

Binary file (2.03 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/position_encoding.cpython-38.pyc ADDED Viewed

Binary file (2.03 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/transformer.cpython-310.pyc ADDED Viewed

Binary file (8.96 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/transformer.cpython-38.pyc ADDED Viewed

Binary file (8.81 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/transformer_utils.cpython-310.pyc ADDED Viewed

Binary file (6.4 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/transformer_utils.cpython-38.pyc ADDED Viewed

Binary file (6.57 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/unet.cpython-310.pyc ADDED Viewed

Binary file (7.4 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/unet.cpython-38.pyc ADDED Viewed

Binary file (7.37 kB). View file

basicsr/archs/ddcolor_arch_utils/convnext.py ADDED Viewed

	@@ -0,0 +1,155 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.models.layers import trunc_normal_, DropPath
+class Block(nn.Module):
+    r""" ConvNeXt Block. There are two equivalent implementations:
+    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
+    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
+    We use (2) as we find it slightly faster in PyTorch
+    Args:
+        dim (int): Number of input channels.
+        drop_path (float): Stochastic depth rate. Default: 0.0
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+    """
+    def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
+        super().__init__()
+        self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv
+        self.norm = LayerNorm(dim, eps=1e-6)
+        self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()
+        self.pwconv2 = nn.Linear(4 * dim, dim)
+        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
+                                    requires_grad=True) if layer_scale_init_value > 0 else None
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+    def forward(self, x):
+        input = x
+        x = self.dwconv(x)
+        x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.pwconv2(x)
+        if self.gamma is not None:
+            x = self.gamma * x
+        x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
+        x = input + self.drop_path(x)
+        return x
+class ConvNeXt(nn.Module):
+    r""" ConvNeXt
+        A PyTorch impl of : `A ConvNet for the 2020s`  -
+          https://arxiv.org/pdf/2201.03545.pdf
+    Args:
+        in_chans (int): Number of input image channels. Default: 3
+        num_classes (int): Number of classes for classification head. Default: 1000
+        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
+        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
+        drop_path_rate (float): Stochastic depth rate. Default: 0.
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+        head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
+    """
+    def __init__(self, in_chans=3, num_classes=1000,
+                 depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0.,
+                 layer_scale_init_value=1e-6, head_init_scale=1.,
+                 ):
+        super().__init__()
+        self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
+        stem = nn.Sequential(
+            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
+            LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
+        )
+        self.downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.Sequential(
+                    LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
+                    nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
+            )
+            self.downsample_layers.append(downsample_layer)
+        self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates=[x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
+        cur = 0
+        for i in range(4):
+            stage = nn.Sequential(
+                *[Block(dim=dims[i], drop_path=dp_rates[cur + j],
+                layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])]
+            )
+            self.stages.append(stage)
+            cur += depths[i]
+        # add norm layers for each output
+        out_indices = (0, 1, 2, 3)
+        for i in out_indices:
+            layer = LayerNorm(dims[i], eps=1e-6, data_format="channels_first")
+            # layer = nn.Identity()
+            layer_name = f'norm{i}'
+            self.add_module(layer_name, layer)
+        self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
+        # self.head_cls = nn.Linear(dims[-1], 4)
+        self.apply(self._init_weights)
+        # self.head_cls.weight.data.mul_(head_init_scale)
+        # self.head_cls.bias.data.mul_(head_init_scale)
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2d, nn.Linear)):
+            trunc_normal_(m.weight, std=.02)
+            nn.init.constant_(m.bias, 0)
+    def forward_features(self, x):
+        for i in range(4):
+            x = self.downsample_layers[i](x)
+            x = self.stages[i](x)
+            # add extra norm
+            norm_layer = getattr(self, f'norm{i}')
+            # x = norm_layer(x)
+            norm_layer(x)
+        return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C)
+    def forward(self, x):
+        x = self.forward_features(x)
+        # x = self.head_cls(x)
+        return x
+class LayerNorm(nn.Module):
+    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
+    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
+    shape (batch_size, height, width, channels) while channels_first corresponds to inputs
+    with shape (batch_size, channels, height, width).
+    """
+    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape))
+        self.eps = eps
+        self.data_format = data_format
+        if self.data_format not in ["channels_last", "channels_first"]:
+            raise NotImplementedError
+        self.normalized_shape = (normalized_shape, )
+    def forward(self, x):
+        if self.data_format == "channels_last":  # B H W C
+            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+        elif self.data_format == "channels_first":  # B C H W
+            u = x.mean(1, keepdim=True)
+            s = (x - u).pow(2).mean(1, keepdim=True)
+            x = (x - u) / torch.sqrt(s + self.eps)
+            x = self.weight[:, None, None] * x + self.bias[:, None, None]
+            return x

basicsr/archs/ddcolor_arch_utils/position_encoding.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# Modified from: https://github.com/facebookresearch/detr/blob/master/models/position_encoding.py
+"""
+Various positional encodings for the transformer.
+"""
+import math
+import torch
+from torch import nn
+class PositionEmbeddingSine(nn.Module):
+    """
+    This is a more standard version of the position embedding, very similar to the one
+    used by the Attention is all you need paper, generalized to work on images.
+    """
+    def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
+        super().__init__()
+        self.num_pos_feats = num_pos_feats
+        self.temperature = temperature
+        self.normalize = normalize
+        if scale is not None and normalize is False:
+            raise ValueError("normalize should be True if scale is passed")
+        if scale is None:
+            scale = 2 * math.pi
+        self.scale = scale
+    def forward(self, x, mask=None):
+        if mask is None:
+            mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool)
+        not_mask = ~mask
+        y_embed = not_mask.cumsum(1, dtype=torch.float32)
+        x_embed = not_mask.cumsum(2, dtype=torch.float32)
+        if self.normalize:
+            eps = 1e-6
+            y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
+            x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+        pos_x = x_embed[:, :, :, None] / dim_t
+        pos_y = y_embed[:, :, :, None] / dim_t
+        pos_x = torch.stack(
+            (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos_y = torch.stack(
+            (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
+        return pos

basicsr/archs/ddcolor_arch_utils/transformer.py ADDED Viewed

	@@ -0,0 +1,368 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# Modified from: https://github.com/facebookresearch/detr/blob/master/models/transformer.py
+"""
+Transformer class.
+Copy-paste from torch.nn.Transformer with modifications:
+    * positional encodings are passed in MHattention
+    * extra LN at the end of encoder is removed
+    * decoder returns a stack of activations from all decoding layers
+"""
+import copy
+from typing import List, Optional
+import torch
+import torch.nn.functional as F
+from torch import Tensor, nn
+class Transformer(nn.Module):
+    def __init__(
+        self,
+        d_model=512,
+        nhead=8,
+        num_encoder_layers=6,
+        num_decoder_layers=6,
+        dim_feedforward=2048,
+        dropout=0.1,
+        activation="relu",
+        normalize_before=False,
+        return_intermediate_dec=False,
+    ):
+        super().__init__()
+        encoder_layer = TransformerEncoderLayer(
+            d_model, nhead, dim_feedforward, dropout, activation, normalize_before
+        )
+        encoder_norm = nn.LayerNorm(d_model) if normalize_before else None
+        self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)
+        decoder_layer = TransformerDecoderLayer(
+            d_model, nhead, dim_feedforward, dropout, activation, normalize_before
+        )
+        decoder_norm = nn.LayerNorm(d_model)
+        self.decoder = TransformerDecoder(
+            decoder_layer,
+            num_decoder_layers,
+            decoder_norm,
+            return_intermediate=return_intermediate_dec,
+        )
+        self._reset_parameters()
+        self.d_model = d_model
+        self.nhead = nhead
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+    def forward(self, src, mask, query_embed, pos_embed):
+        # flatten NxCxHxW to HWxNxC
+        bs, c, h, w = src.shape
+        src = src.flatten(2).permute(2, 0, 1)
+        pos_embed = pos_embed.flatten(2).permute(2, 0, 1)
+        query_embed = query_embed.unsqueeze(1).repeat(1, bs, 1)
+        if mask is not None:
+            mask = mask.flatten(1)
+        tgt = torch.zeros_like(query_embed)
+        memory = self.encoder(src, src_key_padding_mask=mask, pos=pos_embed)
+        hs = self.decoder(
+            tgt, memory, memory_key_padding_mask=mask, pos=pos_embed, query_pos=query_embed
+        )
+        return hs.transpose(1, 2), memory.permute(1, 2, 0).view(bs, c, h, w)
+class TransformerEncoder(nn.Module):
+    def __init__(self, encoder_layer, num_layers, norm=None):
+        super().__init__()
+        self.layers = _get_clones(encoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.norm = norm
+    def forward(
+        self,
+        src,
+        mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+    ):
+        output = src
+        for layer in self.layers:
+            output = layer(
+                output, src_mask=mask, src_key_padding_mask=src_key_padding_mask, pos=pos
+            )
+        if self.norm is not None:
+            output = self.norm(output)
+        return output
+class TransformerDecoder(nn.Module):
+    def __init__(self, decoder_layer, num_layers, norm=None, return_intermediate=False):
+        super().__init__()
+        self.layers = _get_clones(decoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.norm = norm
+        self.return_intermediate = return_intermediate
+    def forward(
+        self,
+        tgt,
+        memory,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+    ):
+        output = tgt
+        intermediate = []
+        for layer in self.layers:
+            output = layer(
+                output,
+                memory,
+                tgt_mask=tgt_mask,
+                memory_mask=memory_mask,
+                tgt_key_padding_mask=tgt_key_padding_mask,
+                memory_key_padding_mask=memory_key_padding_mask,
+                pos=pos,
+                query_pos=query_pos,
+            )
+            if self.return_intermediate:
+                intermediate.append(self.norm(output))
+        if self.norm is not None:
+            output = self.norm(output)
+            if self.return_intermediate:
+                intermediate.pop()
+                intermediate.append(output)
+        if self.return_intermediate:
+            return torch.stack(intermediate)
+        return output.unsqueeze(0)
+class TransformerEncoderLayer(nn.Module):
+    def __init__(
+        self,
+        d_model,
+        nhead,
+        dim_feedforward=2048,
+        dropout=0.1,
+        activation="relu",
+        normalize_before=False,
+    ):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+        self.normalize_before = normalize_before
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward_post(
+        self,
+        src,
+        src_mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+    ):
+        q = k = self.with_pos_embed(src, pos)
+        src2 = self.self_attn(
+            q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask
+        )[0]
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
+        src = src + self.dropout2(src2)
+        src = self.norm2(src)
+        return src
+    def forward_pre(
+        self,
+        src,
+        src_mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+    ):
+        src2 = self.norm1(src)
+        q = k = self.with_pos_embed(src2, pos)
+        src2 = self.self_attn(
+            q, k, value=src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask
+        )[0]
+        src = src + self.dropout1(src2)
+        src2 = self.norm2(src)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src2))))
+        src = src + self.dropout2(src2)
+        return src
+    def forward(
+        self,
+        src,
+        src_mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+    ):
+        if self.normalize_before:
+            return self.forward_pre(src, src_mask, src_key_padding_mask, pos)
+        return self.forward_post(src, src_mask, src_key_padding_mask, pos)
+class TransformerDecoderLayer(nn.Module):
+    def __init__(
+        self,
+        d_model,
+        nhead,
+        dim_feedforward=2048,
+        dropout=0.1,
+        activation="relu",
+        normalize_before=False,
+    ):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.dropout3 = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+        self.normalize_before = normalize_before
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward_post(
+        self,
+        tgt,
+        memory,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+    ):
+        q = k = self.with_pos_embed(tgt, query_pos)
+        tgt2 = self.self_attn(
+            q, k, value=tgt, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
+        )[0]
+        tgt = tgt + self.dropout1(tgt2)
+        tgt = self.norm1(tgt)
+        tgt2 = self.multihead_attn(
+            query=self.with_pos_embed(tgt, query_pos),
+            key=self.with_pos_embed(memory, pos),
+            value=memory,
+            attn_mask=memory_mask,
+            key_padding_mask=memory_key_padding_mask,
+        )[0]
+        tgt = tgt + self.dropout2(tgt2)
+        tgt = self.norm2(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout3(tgt2)
+        tgt = self.norm3(tgt)
+        return tgt
+    def forward_pre(
+        self,
+        tgt,
+        memory,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+    ):
+        tgt2 = self.norm1(tgt)
+        q = k = self.with_pos_embed(tgt2, query_pos)
+        tgt2 = self.self_attn(
+            q, k, value=tgt2, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
+        )[0]
+        tgt = tgt + self.dropout1(tgt2)
+        tgt2 = self.norm2(tgt)
+        tgt2 = self.multihead_attn(
+            query=self.with_pos_embed(tgt2, query_pos),
+            key=self.with_pos_embed(memory, pos),
+            value=memory,
+            attn_mask=memory_mask,
+            key_padding_mask=memory_key_padding_mask,
+        )[0]
+        tgt = tgt + self.dropout2(tgt2)
+        tgt2 = self.norm3(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
+        tgt = tgt + self.dropout3(tgt2)
+        return tgt
+    def forward(
+        self,
+        tgt,
+        memory,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+    ):
+        if self.normalize_before:
+            return self.forward_pre(
+                tgt,
+                memory,
+                tgt_mask,
+                memory_mask,
+                tgt_key_padding_mask,
+                memory_key_padding_mask,
+                pos,
+                query_pos,
+            )
+        return self.forward_post(
+            tgt,
+            memory,
+            tgt_mask,
+            memory_mask,
+            tgt_key_padding_mask,
+            memory_key_padding_mask,
+            pos,
+            query_pos,
+        )
+def _get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(f"activation should be relu/gelu, not {activation}.")

basicsr/archs/ddcolor_arch_utils/transformer_utils.py ADDED Viewed

	@@ -0,0 +1,192 @@

+from typing import Optional
+from torch import nn, Tensor
+from torch.nn import functional as F
+class SelfAttentionLayer(nn.Module):
+    def __init__(self, d_model, nhead, dropout=0.0,
+                 activation="relu", normalize_before=False):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.norm = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+        self.normalize_before = normalize_before
+        self._reset_parameters()
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward_post(self, tgt,
+                     tgt_mask: Optional[Tensor] = None,
+                     tgt_key_padding_mask: Optional[Tensor] = None,
+                     query_pos: Optional[Tensor] = None):
+        q = k = self.with_pos_embed(tgt, query_pos)
+        tgt2 = self.self_attn(q, k, value=tgt, attn_mask=tgt_mask,
+                              key_padding_mask=tgt_key_padding_mask)[0]
+        tgt = tgt + self.dropout(tgt2)
+        tgt = self.norm(tgt)
+        return tgt
+    def forward_pre(self, tgt,
+                    tgt_mask: Optional[Tensor] = None,
+                    tgt_key_padding_mask: Optional[Tensor] = None,
+                    query_pos: Optional[Tensor] = None):
+        tgt2 = self.norm(tgt)
+        q = k = self.with_pos_embed(tgt2, query_pos)
+        tgt2 = self.self_attn(q, k, value=tgt2, attn_mask=tgt_mask,
+                              key_padding_mask=tgt_key_padding_mask)[0]
+        tgt = tgt + self.dropout(tgt2)
+        return tgt
+    def forward(self, tgt,
+                tgt_mask: Optional[Tensor] = None,
+                tgt_key_padding_mask: Optional[Tensor] = None,
+                query_pos: Optional[Tensor] = None):
+        if self.normalize_before:
+            return self.forward_pre(tgt, tgt_mask,
+                                    tgt_key_padding_mask, query_pos)
+        return self.forward_post(tgt, tgt_mask,
+                                 tgt_key_padding_mask, query_pos)
+class CrossAttentionLayer(nn.Module):
+    def __init__(self, d_model, nhead, dropout=0.0,
+                 activation="relu", normalize_before=False):
+        super().__init__()
+        self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.norm = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+        self.normalize_before = normalize_before
+        self._reset_parameters()
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward_post(self, tgt, memory,
+                     memory_mask: Optional[Tensor] = None,
+                     memory_key_padding_mask: Optional[Tensor] = None,
+                     pos: Optional[Tensor] = None,
+                     query_pos: Optional[Tensor] = None):
+        tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt, query_pos),
+                                   key=self.with_pos_embed(memory, pos),
+                                   value=memory, attn_mask=memory_mask,
+                                   key_padding_mask=memory_key_padding_mask)[0]
+        tgt = tgt + self.dropout(tgt2)
+        tgt = self.norm(tgt)
+        return tgt
+    def forward_pre(self, tgt, memory,
+                    memory_mask: Optional[Tensor] = None,
+                    memory_key_padding_mask: Optional[Tensor] = None,
+                    pos: Optional[Tensor] = None,
+                    query_pos: Optional[Tensor] = None):
+        tgt2 = self.norm(tgt)
+        tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt2, query_pos),
+                                   key=self.with_pos_embed(memory, pos),
+                                   value=memory, attn_mask=memory_mask,
+                                   key_padding_mask=memory_key_padding_mask)[0]
+        tgt = tgt + self.dropout(tgt2)
+        return tgt
+    def forward(self, tgt, memory,
+                memory_mask: Optional[Tensor] = None,
+                memory_key_padding_mask: Optional[Tensor] = None,
+                pos: Optional[Tensor] = None,
+                query_pos: Optional[Tensor] = None):
+        if self.normalize_before:
+            return self.forward_pre(tgt, memory, memory_mask,
+                                    memory_key_padding_mask, pos, query_pos)
+        return self.forward_post(tgt, memory, memory_mask,
+                                 memory_key_padding_mask, pos, query_pos)
+class FFNLayer(nn.Module):
+    def __init__(self, d_model, dim_feedforward=2048, dropout=0.0,
+                 activation="relu", normalize_before=False):
+        super().__init__()
+        # Implementation of Feedforward model
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.norm = nn.LayerNorm(d_model)
+        self.activation = _get_activation_fn(activation)
+        self.normalize_before = normalize_before
+        self._reset_parameters()
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward_post(self, tgt):
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout(tgt2)
+        tgt = self.norm(tgt)
+        return tgt
+    def forward_pre(self, tgt):
+        tgt2 = self.norm(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
+        tgt = tgt + self.dropout(tgt2)
+        return tgt
+    def forward(self, tgt):
+        if self.normalize_before:
+            return self.forward_pre(tgt)
+        return self.forward_post(tgt)
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
+class MLP(nn.Module):
+    """ Very simple multi-layer perceptron (also called FFN)"""
+    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
+        super().__init__()
+        self.num_layers = num_layers
+        h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
+        return x

basicsr/archs/ddcolor_arch_utils/unet.py ADDED Viewed

	@@ -0,0 +1,208 @@

+from enum import Enum
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+import collections
+NormType = Enum('NormType', 'Batch BatchZero Weight Spectral')
+class Hook:
+    feature = None
+    def __init__(self, module):
+        self.hook = module.register_forward_hook(self.hook_fn)
+    def hook_fn(self, module, input, output):
+        if isinstance(output, torch.Tensor):
+            self.feature = output
+        elif isinstance(output, collections.OrderedDict):
+            self.feature = output['out']
+    def remove(self):
+        self.hook.remove()
+class SelfAttention(nn.Module):
+    "Self attention layer for nd."
+    def __init__(self, n_channels: int):
+        super().__init__()
+        self.query = conv1d(n_channels, n_channels // 8)
+        self.key = conv1d(n_channels, n_channels // 8)
+        self.value = conv1d(n_channels, n_channels)
+        self.gamma = nn.Parameter(torch.tensor([0.]))
+    def forward(self, x):
+        #Notation from https://arxiv.org/pdf/1805.08318.pdf
+        size = x.size()
+        x = x.view(*size[:2], -1)
+        f, g, h = self.query(x), self.key(x), self.value(x)
+        beta = F.softmax(torch.bmm(f.permute(0, 2, 1).contiguous(), g), dim=1)
+        o = self.gamma * torch.bmm(h, beta) + x
+        return o.view(*size).contiguous()
+def batchnorm_2d(nf: int, norm_type: NormType = NormType.Batch):
+    "A batchnorm2d layer with `nf` features initialized depending on `norm_type`."
+    bn = nn.BatchNorm2d(nf)
+    with torch.no_grad():
+        bn.bias.fill_(1e-3)
+        bn.weight.fill_(0. if norm_type == NormType.BatchZero else 1.)
+    return bn
+def init_default(m: nn.Module, func=nn.init.kaiming_normal_) -> None:
+    "Initialize `m` weights with `func` and set `bias` to 0."
+    if func:
+        if hasattr(m, 'weight'): func(m.weight)
+        if hasattr(m, 'bias') and hasattr(m.bias, 'data'): m.bias.data.fill_(0.)
+    return m
+def icnr(x, scale=2, init=nn.init.kaiming_normal_):
+    "ICNR init of `x`, with `scale` and `init` function."
+    ni, nf, h, w = x.shape
+    ni2 = int(ni / (scale**2))
+    k = init(torch.zeros([ni2, nf, h, w])).transpose(0, 1)
+    k = k.contiguous().view(ni2, nf, -1)
+    k = k.repeat(1, 1, scale**2)
+    k = k.contiguous().view([nf, ni, h, w]).transpose(0, 1)
+    x.data.copy_(k)
+def conv1d(ni: int, no: int, ks: int = 1, stride: int = 1, padding: int = 0, bias: bool = False):
+    "Create and initialize a `nn.Conv1d` layer with spectral normalization."
+    conv = nn.Conv1d(ni, no, ks, stride=stride, padding=padding, bias=bias)
+    nn.init.kaiming_normal_(conv.weight)
+    if bias: conv.bias.data.zero_()
+    return nn.utils.spectral_norm(conv)
+def custom_conv_layer(
+    ni: int,
+    nf: int,
+    ks: int = 3,
+    stride: int = 1,
+    padding: int = None,
+    bias: bool = None,
+    is_1d: bool = False,
+    norm_type=NormType.Batch,
+    use_activ: bool = True,
+    transpose: bool = False,
+    init=nn.init.kaiming_normal_,
+    self_attention: bool = False,
+    extra_bn: bool = False,
+):
+    "Create a sequence of convolutional (`ni` to `nf`), ReLU (if `use_activ`) and batchnorm (if `bn`) layers."
+    if padding is None:
+        padding = (ks - 1) // 2 if not transpose else 0
+    bn = norm_type in (NormType.Batch, NormType.BatchZero) or extra_bn == True
+    if bias is None:
+        bias = not bn
+    conv_func = nn.ConvTranspose2d if transpose else nn.Conv1d if is_1d else nn.Conv2d
+    conv = init_default(
+        conv_func(ni, nf, kernel_size=ks, bias=bias, stride=stride, padding=padding),
+        init,
+    )
+    if norm_type == NormType.Weight:
+        conv = nn.utils.weight_norm(conv)
+    elif norm_type == NormType.Spectral:
+        conv = nn.utils.spectral_norm(conv)
+    layers = [conv]
+    if use_activ:
+        layers.append(nn.ReLU(True))
+    if bn:
+        layers.append((nn.BatchNorm1d if is_1d else nn.BatchNorm2d)(nf))
+    if self_attention:
+        layers.append(SelfAttention(nf))
+    return nn.Sequential(*layers)
+def conv_layer(ni: int,
+               nf: int,
+               ks: int = 3,
+               stride: int = 1,
+               padding: int = None,
+               bias: bool = None,
+               is_1d: bool = False,
+               norm_type=NormType.Batch,
+               use_activ: bool = True,
+               transpose: bool = False,
+               init=nn.init.kaiming_normal_,
+               self_attention: bool = False):
+    "Create a sequence of convolutional (`ni` to `nf`), ReLU (if `use_activ`) and batchnorm (if `bn`) layers."
+    if padding is None: padding = (ks - 1) // 2 if not transpose else 0
+    bn = norm_type in (NormType.Batch, NormType.BatchZero)
+    if bias is None: bias = not bn
+    conv_func = nn.ConvTranspose2d if transpose else nn.Conv1d if is_1d else nn.Conv2d
+    conv = init_default(conv_func(ni, nf, kernel_size=ks, bias=bias, stride=stride, padding=padding), init)
+    if norm_type == NormType.Weight: conv = nn.utils.weight_norm(conv)
+    elif norm_type == NormType.Spectral: conv = nn.utils.spectral_norm(conv)
+    layers = [conv]
+    if use_activ: layers.append(nn.ReLU(True))
+    if bn: layers.append((nn.BatchNorm1d if is_1d else nn.BatchNorm2d)(nf))
+    if self_attention: layers.append(SelfAttention(nf))
+    return nn.Sequential(*layers)
+def _conv(ni: int, nf: int, ks: int = 3, stride: int = 1, **kwargs):
+    return conv_layer(ni, nf, ks=ks, stride=stride, norm_type=NormType.Spectral, **kwargs)
+class CustomPixelShuffle_ICNR(nn.Module):
+    "Upsample by `scale` from `ni` filters to `nf` (default `ni`), using `nn.PixelShuffle`, `icnr` init, and `weight_norm`."
+    def __init__(self,
+                 ni: int,
+                 nf: int = None,
+                 scale: int = 2,
+                 blur: bool = True,
+                 norm_type=NormType.Spectral,
+                 extra_bn=False):
+        super().__init__()
+        self.conv = custom_conv_layer(
+            ni, nf * (scale**2), ks=1, use_activ=False, norm_type=norm_type, extra_bn=extra_bn)
+        icnr(self.conv[0].weight)
+        self.shuf = nn.PixelShuffle(scale)
+        self.do_blur = blur
+        # Blurring over (h*w) kernel
+        # "Super-Resolution using Convolutional Neural Networks without Any Checkerboard Artifacts"
+        # - https://arxiv.org/abs/1806.02658
+        self.pad = nn.ReplicationPad2d((1, 0, 1, 0))
+        self.blur = nn.AvgPool2d(2, stride=1)
+        self.relu = nn.ReLU(True)
+    def forward(self, x):
+        x = self.shuf(self.relu(self.conv(x)))
+        return self.blur(self.pad(x)) if self.do_blur else x
+class UnetBlockWide(nn.Module):
+    "A quasi-UNet block, using `PixelShuffle_ICNR upsampling`."
+    def __init__(self,
+                 up_in_c: int,
+                 x_in_c: int,
+                 n_out: int,
+                 hook,
+                 blur: bool = False,
+                 self_attention: bool = False,
+                 norm_type=NormType.Spectral):
+        super().__init__()
+        self.hook = hook
+        up_out = n_out
+        self.shuf = CustomPixelShuffle_ICNR(up_in_c, up_out, blur=blur, norm_type=norm_type, extra_bn=True)
+        self.bn = batchnorm_2d(x_in_c)
+        ni = up_out + x_in_c
+        self.conv = custom_conv_layer(ni, n_out, norm_type=norm_type, self_attention=self_attention, extra_bn=True)
+        self.relu = nn.ReLU()
+    def forward(self, up_in):
+        s = self.hook.feature
+        up_out = self.shuf(up_in)
+        cat_x = self.relu(torch.cat([up_out, self.bn(s)], dim=1))
+        return self.conv(cat_x)

basicsr/archs/ddcolor_arch_utils/util.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import numpy as np
+import torch
+from skimage import color
+def rgb2lab(img_rgb):
+    img_lab = color.rgb2lab(img_rgb)
+    return img_lab[:, :, :1], img_lab[:, :, 1:]
+def tensor_lab2rgb(labs, illuminant="D65", observer="2"):
+    """
+    Args:
+        lab    : (B, C, H, W)
+    Returns:
+        tuple   : (B, C, H, W)
+    """
+    illuminants = \
+        {"A": {'2': (1.098466069456375, 1, 0.3558228003436005),
+            '10': (1.111420406956693, 1, 0.3519978321919493)},
+        "D50": {'2': (0.9642119944211994, 1, 0.8251882845188288),
+                '10': (0.9672062750333777, 1, 0.8142801513128616)},
+        "D55": {'2': (0.956797052643698, 1, 0.9214805860173273),
+                '10': (0.9579665682254781, 1, 0.9092525159847462)},
+        "D65": {'2': (0.95047, 1., 1.08883),  # This was: `lab_ref_white`
+                '10': (0.94809667673716, 1, 1.0730513595166162)},
+        "D75": {'2': (0.9497220898840717, 1, 1.226393520724154),
+                '10': (0.9441713925645873, 1, 1.2064272211720228)},
+        "E": {'2': (1.0, 1.0, 1.0),
+            '10': (1.0, 1.0, 1.0)}}
+    xyz_from_rgb = np.array([[0.412453, 0.357580, 0.180423], [0.212671, 0.715160, 0.072169],
+                            [0.019334, 0.119193, 0.950227]])
+    rgb_from_xyz = np.array([[3.240481340, -0.96925495, 0.055646640], [-1.53715152, 1.875990000, -0.20404134],
+                            [-0.49853633, 0.041555930, 1.057311070]])
+    B, C, H, W = labs.shape
+    arrs = labs.permute((0, 2, 3, 1)).contiguous()  # (B, 3, H, W) -> (B, H, W, 3)
+    L, a, b = arrs[:, :, :, 0:1], arrs[:, :, :, 1:2], arrs[:, :, :, 2:]
+    y = (L + 16.) / 116.
+    x = (a / 500.) + y
+    z = y - (b / 200.)
+    invalid = z.data < 0
+    z[invalid] = 0
+    xyz = torch.cat([x, y, z], dim=3)
+    mask = xyz.data > 0.2068966
+    mask_xyz = xyz.clone()
+    mask_xyz[mask] = torch.pow(xyz[mask], 3.0)
+    mask_xyz[~mask] = (xyz[~mask] - 16.0 / 116.) / 7.787
+    xyz_ref_white = illuminants[illuminant][observer]
+    for i in range(C):
+        mask_xyz[:, :, :, i] = mask_xyz[:, :, :, i] * xyz_ref_white[i]
+    rgb_trans = torch.mm(mask_xyz.view(-1, 3), torch.from_numpy(rgb_from_xyz).type_as(xyz)).view(B, H, W, C)
+    rgb = rgb_trans.permute((0, 3, 1, 2)).contiguous()
+    mask = rgb.data > 0.0031308
+    mask_rgb = rgb.clone()
+    mask_rgb[mask] = 1.055 * torch.pow(rgb[mask], 1 / 2.4) - 0.055
+    mask_rgb[~mask] = rgb[~mask] * 12.92
+    neg_mask = mask_rgb.data < 0
+    large_mask = mask_rgb.data > 1
+    mask_rgb[neg_mask] = 0
+    mask_rgb[large_mask] = 1
+    return mask_rgb

basicsr/archs/discriminator_arch.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import torch
+import torch.nn as nn
+from torchvision import models
+import numpy as np
+from basicsr.archs.ddcolor_arch_utils.unet import _conv
+from basicsr.utils.registry import ARCH_REGISTRY
+@ARCH_REGISTRY.register()
+class DynamicUNetDiscriminator(nn.Module):
+    def __init__(self, n_channels: int = 3, nf: int = 256, n_blocks: int = 3):
+        super().__init__()
+        layers = [_conv(n_channels, nf, ks=4, stride=2)]
+        for i in range(n_blocks):
+            layers += [
+                _conv(nf, nf, ks=3, stride=1),
+                _conv(nf, nf * 2, ks=4, stride=2, self_attention=(i == 0)),
+            ]
+            nf *= 2
+        layers += [_conv(nf, nf, ks=3, stride=1), _conv(nf, 1, ks=4, bias=False, padding=0, use_activ=False)]
+        self.layers = nn.Sequential(*layers)
+    def forward(self, x):
+        out = self.layers(x)
+        out = out.view(out.size(0), -1)
+        return out

basicsr/archs/vgg_arch.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import os
+import torch
+from collections import OrderedDict
+from torch import nn as nn
+from torchvision.models import vgg as vgg
+from basicsr.utils.registry import ARCH_REGISTRY
+VGG_PRETRAIN_PATH = {
+    'vgg19': './pretrain/vgg19-dcbb9e9d.pth',
+    'vgg16_bn': './pretrain/vgg16_bn-6c64b313.pth'
+}
+NAMES = {
+    'vgg11': [
+        'conv1_1', 'relu1_1', 'pool1', 'conv2_1', 'relu2_1', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2',
+        'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2',
+        'pool5'
+    ],
+    'vgg13': [
+        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
+        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'pool4',
+        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'pool5'
+    ],
+    'vgg16': [
+        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
+        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2',
+        'relu4_2', 'conv4_3', 'relu4_3', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3',
+        'pool5'
+    ],
+    'vgg19': [
+        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
+        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1',
+        'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1',
+        'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5'
+    ]
+}
+def insert_bn(names):
+    """Insert bn layer after each conv.
+    Args:
+        names (list): The list of layer names.
+    Returns:
+        list: The list of layer names with bn layers.
+    """
+    names_bn = []
+    for name in names:
+        names_bn.append(name)
+        if 'conv' in name:
+            position = name.replace('conv', '')
+            names_bn.append('bn' + position)
+    return names_bn
+@ARCH_REGISTRY.register()
+class VGGFeatureExtractor(nn.Module):
+    """VGG network for feature extraction.
+    In this implementation, we allow users to choose whether use normalization
+    in the input feature and the type of vgg network. Note that the pretrained
+    path must fit the vgg type.
+    Args:
+        layer_name_list (list[str]): Forward function returns the corresponding
+            features according to the layer_name_list.
+            Example: {'relu1_1', 'relu2_1', 'relu3_1'}.
+        vgg_type (str): Set the type of vgg network. Default: 'vgg19'.
+        use_input_norm (bool): If True, normalize the input image. Importantly,
+            the input feature must in the range [0, 1]. Default: True.
+        range_norm (bool): If True, norm images with range [-1, 1] to [0, 1].
+            Default: False.
+        requires_grad (bool): If true, the parameters of VGG network will be
+            optimized. Default: False.
+        remove_pooling (bool): If true, the max pooling operations in VGG net
+            will be removed. Default: False.
+        pooling_stride (int): The stride of max pooling operation. Default: 2.
+    """
+    def __init__(self,
+                 layer_name_list,
+                 vgg_type='vgg19',
+                 use_input_norm=True,
+                 range_norm=False,
+                 requires_grad=False,
+                 remove_pooling=False,
+                 pooling_stride=2):
+        super(VGGFeatureExtractor, self).__init__()
+        self.layer_name_list = layer_name_list
+        self.use_input_norm = use_input_norm
+        self.range_norm = range_norm
+        self.names = NAMES[vgg_type.replace('_bn', '')]
+        if 'bn' in vgg_type:
+            self.names = insert_bn(self.names)
+        # only borrow layers that will be used to avoid unused params
+        max_idx = 0
+        for v in layer_name_list:
+            idx = self.names.index(v)
+            if idx > max_idx:
+                max_idx = idx
+        if os.path.exists(VGG_PRETRAIN_PATH[vgg_type]):
+            vgg_net = getattr(vgg, vgg_type)(pretrained=False)
+            state_dict = torch.load(VGG_PRETRAIN_PATH[vgg_type], map_location=lambda storage, loc: storage)
+            vgg_net.load_state_dict(state_dict)
+        else:
+            vgg_net = getattr(vgg, vgg_type)(pretrained=True)
+        features = vgg_net.features[:max_idx + 1]
+        modified_net = OrderedDict()
+        for k, v in zip(self.names, features):
+            if 'pool' in k:
+                # if remove_pooling is true, pooling operation will be removed
+                if remove_pooling:
+                    continue
+                else:
+                    # in some cases, we may want to change the default stride
+                    modified_net[k] = nn.MaxPool2d(kernel_size=2, stride=pooling_stride)
+            else:
+                modified_net[k] = v
+        self.vgg_net = nn.Sequential(modified_net)
+        if not requires_grad:
+            self.vgg_net.eval()
+            for param in self.parameters():
+                param.requires_grad = False
+        else:
+            self.vgg_net.train()
+            for param in self.parameters():
+                param.requires_grad = True
+        if self.use_input_norm:
+            # the mean is for image with range [0, 1]
+            self.register_buffer('mean', torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
+            # the std is for image with range [0, 1]
+            self.register_buffer('std', torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
+    def forward(self, x):
+        """Forward function.
+        Args:
+            x (Tensor): Input tensor with shape (n, c, h, w).
+        Returns:
+            Tensor: Forward results.
+        """
+        if self.range_norm:
+            x = (x + 1) / 2
+        if self.use_input_norm:
+            x = (x - self.mean) / self.std
+        output = {}
+        for key, layer in self.vgg_net._modules.items():
+            x = layer(x)
+            if key in self.layer_name_list:
+                output[key] = x.clone()
+        return output

basicsr/data/__init__.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import importlib
+import numpy as np
+import random
+import torch
+import torch.utils.data
+from copy import deepcopy
+from functools import partial
+from os import path as osp
+from basicsr.data.prefetch_dataloader import PrefetchDataLoader
+from basicsr.utils import get_root_logger, scandir
+from basicsr.utils.dist_util import get_dist_info
+from basicsr.utils.registry import DATASET_REGISTRY
+__all__ = ['build_dataset', 'build_dataloader']
+# automatically scan and import dataset modules for registry
+# scan all the files under the data folder with '_dataset' in file names
+data_folder = osp.dirname(osp.abspath(__file__))
+dataset_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(data_folder) if v.endswith('_dataset.py')]
+# import all the dataset modules
+_dataset_modules = [importlib.import_module(f'basicsr.data.{file_name}') for file_name in dataset_filenames]
+def build_dataset(dataset_opt):
+    """Build dataset from options.
+    Args:
+        dataset_opt (dict): Configuration for dataset. It must contain:
+            name (str): Dataset name.
+            type (str): Dataset type.
+    """
+    dataset_opt = deepcopy(dataset_opt)
+    dataset = DATASET_REGISTRY.get(dataset_opt['type'])(dataset_opt)
+    logger = get_root_logger()
+    logger.info(f'Dataset [{dataset.__class__.__name__}] - {dataset_opt["name"]} ' 'is built.')
+    return dataset
+def build_dataloader(dataset, dataset_opt, num_gpu=1, dist=False, sampler=None, seed=None):
+    """Build dataloader.
+    Args:
+        dataset (torch.utils.data.Dataset): Dataset.
+        dataset_opt (dict): Dataset options. It contains the following keys:
+            phase (str): 'train' or 'val'.
+            num_worker_per_gpu (int): Number of workers for each GPU.
+            batch_size_per_gpu (int): Training batch size for each GPU.
+        num_gpu (int): Number of GPUs. Used only in the train phase.
+            Default: 1.
+        dist (bool): Whether in distributed training. Used only in the train
+            phase. Default: False.
+        sampler (torch.utils.data.sampler): Data sampler. Default: None.
+        seed (int | None): Seed. Default: None
+    """
+    phase = dataset_opt['phase']
+    rank, _ = get_dist_info()
+    if phase == 'train':
+        if dist:  # distributed training
+            batch_size = dataset_opt['batch_size_per_gpu']
+            num_workers = dataset_opt['num_worker_per_gpu']
+        else:  # non-distributed training
+            multiplier = 1 if num_gpu == 0 else num_gpu
+            batch_size = dataset_opt['batch_size_per_gpu'] * multiplier
+            num_workers = dataset_opt['num_worker_per_gpu'] * multiplier
+        dataloader_args = dict(
+            dataset=dataset,
+            batch_size=batch_size,
+            shuffle=False,
+            num_workers=num_workers,
+            sampler=sampler,
+            drop_last=True)
+        if sampler is None:
+            dataloader_args['shuffle'] = True
+        dataloader_args['worker_init_fn'] = partial(
+            worker_init_fn, num_workers=num_workers, rank=rank, seed=seed) if seed is not None else None
+    elif phase in ['val', 'test']:  # validation
+        dataloader_args = dict(dataset=dataset, batch_size=1, shuffle=False, num_workers=0)
+    else:
+        raise ValueError(f'Wrong dataset phase: {phase}. ' "Supported ones are 'train', 'val' and 'test'.")
+    dataloader_args['pin_memory'] = dataset_opt.get('pin_memory', False)
+    dataloader_args['persistent_workers'] = dataset_opt.get('persistent_workers', False)
+    prefetch_mode = dataset_opt.get('prefetch_mode')
+    if prefetch_mode == 'cpu':  # CPUPrefetcher
+        num_prefetch_queue = dataset_opt.get('num_prefetch_queue', 1)
+        logger = get_root_logger()
+        logger.info(f'Use {prefetch_mode} prefetch dataloader: num_prefetch_queue = {num_prefetch_queue}')
+        return PrefetchDataLoader(num_prefetch_queue=num_prefetch_queue, **dataloader_args)
+    else:
+        # prefetch_mode=None: Normal dataloader
+        # prefetch_mode='cuda': dataloader for CUDAPrefetcher
+        return torch.utils.data.DataLoader(**dataloader_args)
+def worker_init_fn(worker_id, num_workers, rank, seed):
+    # Set the worker seed to num_workers * rank + worker_id + seed
+    worker_seed = num_workers * rank + worker_id + seed
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)

basicsr/data/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (3.56 kB). View file