Spaces:

mikonvergence
/

COP-GEN-Beta

Running on Zero

App Files Files Community

mikonvergence commited on Apr 19

Commit

82f1234

1 Parent(s): f7d38e5

github code incorporated

Browse files

Files changed (27) hide show

app.py +1 -1
src/COP-GEN-Beta +0 -1
src/COP-GEN-Beta/.gitignore +13 -0
src/COP-GEN-Beta/README.md +341 -0
src/COP-GEN-Beta/configs/majortom/discrete/lmdb/rome_dems1s2s2_cop_gen_beta.py +64 -0
src/COP-GEN-Beta/configs/majortom/discrete/rome_dems1s2s2_cop_gen_beta.py +62 -0
src/COP-GEN-Beta/create_lmdb.py +213 -0
src/COP-GEN-Beta/datasets.py +885 -0
src/COP-GEN-Beta/dpm_solver_pp.py +952 -0
src/COP-GEN-Beta/encode_majortom_images.py +95 -0
src/COP-GEN-Beta/libs/__init__.py +1 -0
src/COP-GEN-Beta/libs/autoencoder.py +519 -0
src/COP-GEN-Beta/libs/timm.py +112 -0
src/COP-GEN-Beta/libs/triffuser_multi_post_ln.py +290 -0
src/COP-GEN-Beta/majortom/NMajorTOM.py +170 -0
src/COP-GEN-Beta/majortom/coverage_vis.py +149 -0
src/COP-GEN-Beta/majortom/download_world.py +1009 -0
src/COP-GEN-Beta/prepare_dataset_images.py +488 -0
src/COP-GEN-Beta/sample_n_triffuser.py +652 -0
src/COP-GEN-Beta/scripts/download_rome.sh +18 -0
src/COP-GEN-Beta/tools/extract_parquet.py +115 -0
src/COP-GEN-Beta/tools/fid_score.py +260 -0
src/COP-GEN-Beta/tools/inception.py +328 -0
src/COP-GEN-Beta/tools/inspect_parquet.py +92 -0
src/COP-GEN-Beta/tools/print_parquet_urls.py +31 -0
src/COP-GEN-Beta/train_triffuser_discrete.py +408 -0
src/COP-GEN-Beta/utils.py +240 -0

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ with gr.Blocks(theme=theme) as demo:
         gr.Markdown("# 🔵 COP-GEN-Beta: Unified Generative Modelling of COPernicus Imagery Thumbnails")
         gr.Markdown("### Miguel Espinosa, Valerio Marsocci, Yuru Jia, Elliot J. Crowley, Mikolaj Czerkawski")
         gr.Markdown('[[Website](https://miquel-espinosa.github.io/cop-gen-beta/)] [[GitHub](https://github.com/miquel-espinosa/COP-GEN-Beta)] [[Model](https://huggingface.co/mespinosami/COP-GEN-Beta)] [[Dataset](https://huggingface.co/Major-TOM)]')
-        gr.Markdown('> ## ⚠️ NOTE: This is a prototype Beta model of COP-GEN. It is based on image thumbnails of Major TOM and does not yet support raw source data. The hillshade visualisation is used for elevation. The full model COP-GEN is coming soon.')
     with gr.Column(elem_classes="Main app"):

         gr.Markdown("# 🔵 COP-GEN-Beta: Unified Generative Modelling of COPernicus Imagery Thumbnails")
         gr.Markdown("### Miguel Espinosa, Valerio Marsocci, Yuru Jia, Elliot J. Crowley, Mikolaj Czerkawski")
         gr.Markdown('[[Website](https://miquel-espinosa.github.io/cop-gen-beta/)] [[GitHub](https://github.com/miquel-espinosa/COP-GEN-Beta)] [[Model](https://huggingface.co/mespinosami/COP-GEN-Beta)] [[Dataset](https://huggingface.co/Major-TOM)]')
+        gr.Markdown('> ## ⚠️ NOTE: This is a prototype Beta model of COP-GEN. It is based on image thumbnails of [[Major TOM](https://huggingface.co/Major-TOM)] and does not yet support raw source data. The hillshade visualisation is used for elevation. The full model COP-GEN is coming soon.')
     with gr.Column(elem_classes="Main app"):

src/COP-GEN-Beta DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit eef71c50f3a233c30f1e6f8b87d7815494eb1ff2

src/COP-GEN-Beta/.gitignore ADDED Viewed

	@@ -0,0 +1,13 @@

+data
+./data
+assets
+./assets
+workdir
+./workdir
+__pycache__/
+**__pycache__/
+*.out
+*.pth
+out_images/
+models/
+models

src/COP-GEN-Beta/README.md ADDED Viewed

	@@ -0,0 +1,341 @@

+![image/png](images/banner-github-simpler.png)
+# [CVPRW 2025] 🌍 COP-GEN-Beta: Unified Generative Modelling of COPernicus Imagery Thumbnails
+[![HF](https://img.shields.io/badge/%F0%9F%A4%97-Demo-yellow)](https://huggingface.co/mespinosami/COP-GEN-Beta)
+[![GitHub](https://img.shields.io/badge/%E2%80%8B-COP--GEN--Beta-black?logo=github)](https://github.com/miquel-espinosa/COP-GEN-Beta)
+[![website](https://img.shields.io/badge/🌐-Website-grey)](https://miquel-espinosa.github.io/cop-gen-beta/)
+[![HF](https://img.shields.io/badge/%F0%9F%A4%97-Model-yellow)](https://huggingface.co/mespinosami/COP-GEN-Beta)
+[![paper](https://img.shields.io/badge/arXiv-2402.12095-D12424)](https://www.arxiv.org/abs/2504.08548)
+<a href="https://colab.research.google.com/github/ESA-PhiLab/Major-TOM/blob/main/03-Filtering-in-Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
+This repository contains the official implementation of our paper:
+[COP-GEN-Beta: Unified Generative Modelling of COPernicus Imagery Thumbnails,
+*Miguel Espinosa*, *Valerio Marsocci*, *Yuru Jia*, *Elliot J. Crowley*, *Mikolaj Czerkawski*, CVPRW 2025](https://www.arxiv.org/pdf/2504.08548)
+### Abstract
+> _In remote sensing, multi-modal data from various sensors capturing the same scene_
+_offers rich opportunities, but learning a unified representation across these modalities remains a significant challenge._
+_Traditional methods have often been limited to single or dual-modality approaches._
+_In this paper, we introduce COP-GEN-Beta, a generative diffusion model trained on optical, radar, and elevation data from the Major TOM dataset._
+_What sets COP-GEN-Beta apart is its ability to map any subset of modalities to any other, enabling zero-shot modality translation after training._
+_This is achieved through a sequence-based diffusion transformer, where each modality is controlled by its own timestep embedding._
+_We extensively evaluate COP-GEN-Beta on thumbnail images from the Major TOM dataset, demonstrating its effectiveness in generating high-quality samples._
+_Qualitative and quantitative evaluations validate the model's performance, highlighting its potential as a powerful pre-trained model for future remote sensing tasks._
+<!-- <details> -->
+<!-- <summary><h3><b>Table of Contents</b></h3></summary> -->
+### Table of Contents
+- [Architecture Overview](#cop-gen-beta-architecture-overview)
+- [Training Code Instructions](#training-code-instructions)
+  - [0. Basic folder setup](#0-basic-folder-setup)
+  - [1. Download training data](#1-download-training-data-subset-example-rome)
+  - [2. Patchify and encode thumbnails](#2-patchify-and-encode-thumbnails)
+  - [3. Pre-compute features with Stable Diffusion](#3-pre-compute-features-with-stable-diffusion-pretrained-autoencoder)
+  - [4. Convert dataset to LMDB (optional)](#4-convert-dataset-to-lmdb-optional)
+  - [5. Train the model](#5-train-the-model)
+- [Inference Instructions](#cop-gen-beta-inference-instructions)
+  - [1. Download model checkpoint](#1-download-model-checkpoint)
+  - [2. Run inference on test set](#2-run-inference-on-test-set-rome-subset)
+    - [Example 1: Unconditional generation](#example-1-unconditional-generation)
+    - [Example 2: Single modality conditioning](#example-2-single-modality-conditioning)
+    - [Example 3: 2 modality conditioning](#example-3-2-modality-conditioning)
+    - [Example 4: 3 modality conditioning](#example-4-3-modality-conditioning)
+<!-- </details> -->
+# COP-GEN-Beta: Architecture Overview
+We introduce COP-GEN-Beta, a diffusion model designed to handle multiple remote sensing modalities. Specifically, COP-GEN-Beta operates on four key EO modalities: Digital Elevation Model (DEM), Sentinel-1 Radar Terrain Corrected (S1 RTC), Sentinel-2 Level 1C (S2 L1C), and Sentinel-2 Level 2A (S2 L2A). Unlike previous approaches, which require separate models for per modality, COP-GEN-Beta learns joint, conditional, and marginal distributions within a unified framework.
+This is achieved by (a) sampling a global and dense dataset of these modalities from Major TOM, encoding all images with a pretrained StableDiffusion autoencoder, and (b) training a sequence-based denoising diffusion model using a transformer backbone, where each modality is supplied with its designated timestep. This approach makes it possible to (c) generate all modalities based on any subset thereof that is available.
+![COP-GEN-Beta Architecture](images/cop-gen-beta-architecture.png)
+# COP-GEN-Beta: Results
+COP-GEN-Beta's flexible sampling capabilities enable a wide range of downstream applications
+through various modality translation combinations. By allowing generation of any subset of
+modalities conditioned on any other subset, our model unlocks numerous practical use cases in
+remote sensing, from atmospheric correction and DEM generation to dataset expansion.
+![COP-GEN-Beta Results](images/use-case-horizontal.png)
+# COP-GEN-Beta: Training code instructions.
+## 0. Basic folder setup
+Data will be stored in `./data/`. Create a symlink if you need.
+```bash
+ln -s /path/to/disk/with/storage/ ./data
+```
+Download stable diffusion model weights.
+```bash
+mkdir -p ./assets/stable-diffusion
+```
+Download from [here](https://drive.google.com/drive/folders/1sV-IvcGUrZeIlTmtuKv9vDJiB4JEHL-f) and place in `./assets/stable-diffusion`.
+## 1. Download training data (subset example: Rome)
+Select a subset of data to download for training. For example, lets download the region of Rome.
+```bash
+sh scripts/download_rome.sh
+```
+Run `python3 scripts/download_world.py --help` to see the available options.
+The folder structure generated will look like this:
+```
+data/majorTOM
+    ├── Core-DEM
+    │   ├── metadata.parquet
+    ├── Core-S1RTC
+    │   ├── metadata.parquet
+    ├── Core-S2L1C
+    │   ├── metadata.parquet
+    ├── Core-S2L2A
+    │   ├── metadata.parquet
+    ├── rome
+    │   ├── Core-DEM
+    │   │   ├── metadata.parquet (metadata.parquet for rome subset)
+    │   │   ├── <grid_cell>
+    │   │   │   ├── ...
+    |   │   │   ├── compressed.tif
+    |   │   │   ├── DEM.tif
+    |   │   │   └── thumbnail.png
+    │   ├── Core-S1RTC
+    │   │   ├── metadata.parquet (metadata.parquet for rome subset)
+    │   │   ├── <grid_cell>
+    │   │   │   ├── ...
+    │   │   │   ├── vh.tif
+    │   │   │   ├── vv.tif
+    │   │   │   ├── thumbnail.png
+```
+## 2. Patchify and encode thumbnails.
+Align image modalities (find common grid_cells), patchify into 256x256 patches (thumbnails are 1068x1068, we first crop to 1024x1024, then patchify), create train/test splits.
+```bash
+python3 prepare_dataset_images.py --subset_path data/majorTOM/rome --output_dir data/majorTOM/rome/rome_thumbnail_png --bands thumbnail
+```
+## 3. Pre-compute features with Stable Diffusion pretrained autoencoder.
+```bash
+bands=(DEM_thumbnail S1RTC_thumbnail S2L1C_thumbnail S2L2A_thumbnail)
+splits=(train test)
+for band in "${bands[@]}"; do
+    for split in "${splits[@]}"; do
+        python3 encode_majortom_images.py \
+            --path "data/majorTOM/rome/rome_thumbnail_png/${split}/${band}" \
+            --resolution 256 \
+            --output_dir "data/majorTOM/rome/rome_thumbnail_npy/${split}/${band}"
+    done
+done
+```
+Folder structure generated for the command above:
+```
+data/majorTOM/rome/
+├── train
+│   ├── DEM_thumbnail
+│   │   ├── 0.npy
+│   │   ├── 1.npy
+│   │   ├── ...
+│   ├── S1RTC_thumbnail
+│   │   ├── 0.npy
+│   │   ├── 1.npy
+│   │   ├── ...
+│   ├── S2L1C_thumbnail
+│   │   ├── 0.npy
+│   │   ├── 1.npy
+│   │   ├── ...
+│   ├── S2L2A_thumbnail
+│   │   ├── 0.npy
+│   │   ├── 1.npy
+│   │   ├── ...
+├── test
+│   ├── DEM_thumbnail
+│   │   ├── 0.npy
+│   │   ├── 1.npy
+│   │   ├── ...
+```
+## 4. Convert dataset to LMDB (optional).
+Convert npy files to LMDB dataset, for both train and test splits. (Update --batch-size to a lower value if it doesn't work).
+```bash
+python3 create_lmdb.py \
+    --input-img-dir data/majorTOM/rome/rome_thumbnail_npy/train \
+    --output-dir data/majorTOM/rome/rome_thumbnail_npy_lmdb/train \
+    --input-type npy
+python3 create_lmdb.py \
+    --input-img-dir data/majorTOM/rome/rome_thumbnail_npy/test \
+    --output-dir data/majorTOM/rome/rome_thumbnail_npy_lmdb/test \
+    --input-type npy
+```
+## 5. Train the model
+Train the model using the following command (2 GPUs, 4 modalities). Adjust the number of GPUs and the config file as needed.
+NOTE on the config file:
+- Since we are training on a toy dataset, we set the batch size to 8. Feel free to increase it for larger datasets.
+- Logging frequency, eval frequency, and save frequency are set to 2 for faster training. Feel free to increase it for larger datasets.
+Visual results, checkpoints, and logs are stored in a generated folder called `workdir`.
+### Training with LMDB dataset
+```bash
+export NUM_GPUS=2
+accelerate launch \
+            --multi_gpu \
+            --num_processes $NUM_GPUS \
+            --mixed_precision fp16 \
+            train_triffuser_discrete.py \
+                --config="configs/majortom/discrete/lmdb/rome_dems1s2s2_cop_gen_beta.py"
+```
+### Training without LMDB (tuples dataset)
+```bash
+export NUM_GPUS=2
+accelerate launch \
+            --multi_gpu \
+            --num_processes $NUM_GPUS \
+            --mixed_precision fp16 \
+            train_triffuser_discrete.py \
+                --config="configs/majortom/discrete/rome_dems1s2s2_cop_gen_beta.py"
+```
+# COP-GEN-Beta: Inference instructions.
+COP-GEN-Beta is characterized by its great versatility when generating images.
+Given 4 modalities (DEM, S1RTC, S2L1C, S2L2A), there exist the following generation options:
+- **Unconditional generation:** Generates tuples of 4 modalities without any condition.
+- **Conditional generation:**
+    - **Single modality conditioning:** Generates missing modalities conditioned on a single modality.
+    - **2 modality conditioning:** Generates missing modalities conditioned on 2 modalities.
+    - **3 modality conditioning:** Generates missing modalities conditioned on 3 modalities.
+## 1. Download model checkpoint
+<!-- To upload the model to Hugging Face Hub just run in the pth folder: -->
+<!-- huggingface-cli upload mespinosami/COP-GEN-Beta . -->
+Download the model ema checkpoint from Hugging Face (https://huggingface.co/mespinosami/COP-GEN-Beta) [download-link](https://huggingface.co/mespinosami/COP-GEN-Beta/resolve/main/nnet_ema_114000.pth) and place it in `./models` folder.
+This can be done by running:
+```bash
+mkdir -p models
+wget https://huggingface.co/mespinosami/COP-GEN-Beta/resolve/main/nnet_ema_114000.pth -O models/nnet_ema_114000.pth
+```
+## 2. Run inference on test set (Rome subset)
+To see all the available inference options, run `python3 sample_n_triffuser.py --help`.
+For instance:
+- `--n_samples` controls the number of samples to generate for the same input condition
+(useful to evaluate the generation variability),
+- `--generate` is the list (comma separated) of modalities to generate,
+- `--condition` is the list (comma separated) of modalities to condition on,
+### Example 1: Unconditional generation
+Generates all modalities (DEM, S1RTC, S2L2A, S2L1C).
+```bash
+python3 sample_n_triffuser.py \
+    --config configs/majortom/discrete/lmdb/rome_dems1s2s2_cop_gen_beta.py \
+    --data_path data/majorTOM/rome/rome_thumbnail_npy_lmdb/test \
+    --data_type lmdb \
+    --nnet_path models/nnet_ema_114000.pth \
+    --n_mod 4 \
+    --generate dem,s1_rtc,s2_l2a,s2_l1c \
+    --output_path out_images \
+    --n_samples 4 \
+    --save_as grid
+```
+### Example 2: Single modality conditioning
+Conditioning on S1RTC to generate DEM, S2L2A, and S2L1C.
+```bash
+python3 sample_n_triffuser.py \
+    --config configs/majortom/discrete/lmdb/rome_dems1s2s2_cop_gen_beta.py \
+    --data_path data/majorTOM/rome/rome_thumbnail_npy_lmdb/test \
+    --data_type lmdb \
+    --nnet_path models/nnet_ema_114000.pth \
+    --n_mod 4 \
+    --condition s1_rtc \
+    --generate dem,s2_l2a,s2_l1c \
+    --output_path out_images \
+    --n_samples 4 \
+    --save_as grid
+```
+### Example 3: 2 modality conditioning
+Conditioning on DEM and S1RTC to generate S2L2A and S2L1C.
+```bash
+python3 sample_n_triffuser.py \
+    --config configs/majortom/discrete/lmdb/rome_dems1s2s2_cop_gen_beta.py \
+    --data_path data/majorTOM/rome/rome_thumbnail_npy_lmdb/test \
+    --data_type lmdb \
+    --nnet_path models/nnet_ema_114000.pth \
+    --n_mod 4 \
+    --condition dem,s1_rtc \
+    --generate s2_l2a,s2_l1c \
+    --output_path out_images \
+    --n_samples 4 \
+    --save_as grid
+```
+### Example 4: 3 modality conditioning
+Conditioning on DEM, S1RTC, and S2L2A to generate S2L1C.
+```bash
+python3 sample_n_triffuser.py \
+    --config configs/majortom/discrete/lmdb/rome_dems1s2s2_cop_gen_beta.py \
+    --data_path data/majorTOM/rome/rome_thumbnail_npy_lmdb/test \
+    --data_type lmdb \
+    --nnet_path models/nnet_ema_114000.pth \
+    --n_mod 4 \
+    --condition dem,s1_rtc,s2_l2a \
+    --generate s2_l1c \
+    --output_path out_images \
+    --n_samples 4 \
+    --save_as grid
+```
+# Citation
+If you find this work useful, please cite it as follows:
+```bibtex
+@inproceedings{espinosa2025copgenbeta,
+  title={COP-GEN-Beta: Unified Generative Modelling of COPernicus Imagery Thumbnails},
+  author={Espinosa, Miguel and Marsocci, Valerio and Jia, Yuru and Crowley, Elliot J. and Czerkawski, Mikolaj},
+  booktitle={CVPRW},
+  year={2025}
+}
+```

src/COP-GEN-Beta/configs/majortom/discrete/lmdb/rome_dems1s2s2_cop_gen_beta.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import ml_collections
+def d(**kwargs):
+    """Helper of creating a config dict."""
+    return ml_collections.ConfigDict(initial_dictionary=kwargs)
+def get_config():
+    config = ml_collections.ConfigDict()
+    config.seed = 1234
+    config.pred = "noise_pred"
+    config.z_shape = (4, 32, 32)
+    config.autoencoder = d(pretrained_path="assets/stable-diffusion/autoencoder_kl_ema.pth")
+    config.train = d(
+        n_steps=500000,
+        batch_size=8, # Increase to 512 for larger datasets
+        mode="uncond",
+        log_interval=2, # Increase to 100 for larger datasets
+        eval_interval=2, # Increase to 1500 for larger datasets
+        save_interval=2, # Increase to 1500 for larger datasets
+        multi_modal=True,
+    )
+    config.optimizer = d(
+        name="adamw",
+        lr=0.0002,
+        weight_decay=0.03,
+        betas=(0.99, 0.99),
+    )
+    config.lr_scheduler = d(name="customized", warmup_steps=5000)
+    config.nnet = d(
+        name="triffuser_multi_post_ln",
+        img_size=32,
+        in_chans=4,
+        patch_size=2,
+        embed_dim=1024,
+        depth=20,
+        num_heads=16,
+        mlp_ratio=4,
+        qkv_bias=False,
+        pos_drop_rate=0.,
+        drop_rate=0.,
+        attn_drop_rate=0.,
+        mlp_time_embed=False,
+        num_modalities=4,
+        use_checkpoint=True,
+    )
+    config.dataset = d(
+        name="majorTOM_lmdb_256_features",
+        path="data/majorTOM/rome/rome_thumbnail_npy_lmdb/train",
+        # name="majorTOM_tuples_256_features",
+        # paths=["data/majorTOM/northern_italy/northern_italy_thumbnail_npy/train/DEM_thumbnail",
+        #        "data/majorTOM/northern_italy/northern_italy_thumbnail_npy/train/S1RTC_thumbnail",
+        #        "data/majorTOM/northern_italy/northern_italy_thumbnail_npy/train/S2L1C_thumbnail",
+        #        "data/majorTOM/northern_italy/northern_italy_thumbnail_npy/train/S2L2A_thumbnail"],
+        cfg=False,
+        p_uncond=0.1, # 0.15
+    )
+    config.sample = d(
+        sample_steps=50,
+        n_samples=50000,
+        mini_batch_size=50,  # the decoder is large
+        algorithm="dpm_solver",
+        cfg=True,
+        scale=0.4,
+        path="",
+    )
+    return config

src/COP-GEN-Beta/configs/majortom/discrete/rome_dems1s2s2_cop_gen_beta.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import ml_collections
+def d(**kwargs):
+    """Helper of creating a config dict."""
+    return ml_collections.ConfigDict(initial_dictionary=kwargs)
+def get_config():
+    config = ml_collections.ConfigDict()
+    config.seed = 1234
+    config.pred = "noise_pred"
+    config.z_shape = (4, 32, 32)
+    config.autoencoder = d(pretrained_path="assets/stable-diffusion/autoencoder_kl_ema.pth")
+    config.train = d(
+        n_steps=500000,
+        batch_size=8, # Increase to 512 for larger datasets
+        mode="uncond",
+        log_interval=2, # Increase to 100 for larger datasets
+        eval_interval=2, # Increase to 1500 for larger datasets
+        save_interval=2, # Increase to 1500 for larger datasets
+        multi_modal=True,
+    )
+    config.optimizer = d(
+        name="adamw",
+        lr=0.0002,
+        weight_decay=0.03,
+        betas=(0.99, 0.99),
+    )
+    config.lr_scheduler = d(name="customized", warmup_steps=5000)
+    config.nnet = d(
+        name="triffuser_multi_post_ln",
+        img_size=32,
+        in_chans=4,
+        patch_size=2,
+        embed_dim=1024,
+        depth=20,
+        num_heads=16,
+        mlp_ratio=4,
+        qkv_bias=False,
+        pos_drop_rate=0.,
+        drop_rate=0.,
+        attn_drop_rate=0.,
+        mlp_time_embed=False,
+        num_modalities=4,
+        use_checkpoint=True,
+    )
+    config.dataset = d(
+        name="majorTOM_tuples_256_features",
+        paths=["data/majorTOM/rome/rome_thumbnail_npy/train/DEM_thumbnail",
+               "data/majorTOM/rome/rome_thumbnail_npy/train/S1RTC_thumbnail",
+               "data/majorTOM/rome/rome_thumbnail_npy/train/S2L1C_thumbnail",
+               "data/majorTOM/rome/rome_thumbnail_npy/train/S2L2A_thumbnail"],
+        cfg=False,
+        p_uncond=0.1, # 0.15
+    )
+    config.sample = d(
+        sample_steps=50,
+        n_samples=50000,
+        mini_batch_size=50,  # the decoder is large
+        algorithm="dpm_solver",
+        cfg=True,
+        scale=0.4,
+        path="",
+    )
+    return config

src/COP-GEN-Beta/create_lmdb.py ADDED Viewed

	@@ -0,0 +1,213 @@

+"""
+Author: Chenhongyi Yang
+Reference: GPViT https://github.com/ChenhongyiYang/GPViT
+"""
+"""
+This script will generate a paired LMDB database for all modalities found in the input directory.
+Thus, the input directory should contain subdirectories for each modality, each containing a set of images.
+The names for the paired images in the different subdirectories should be the same.
+# Example:
+python3 scripts/create_lmdb.py \
+    --input-img-dir data/majorTOM/northern_italy/northern_italy_thumbnail_npy/train \
+    --output-dir data/majorTOM/northern_italy/northern_italy_thumbnail_npy_lmdb/train \
+    --input-type npy
+"""
+import glob
+import blobfile as bf
+import os
+import re
+import time
+from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor
+from typing import Tuple
+import pickle
+import cv2
+import lmdb
+import argparse
+parser = argparse.ArgumentParser('Convert LMDB dataset')
+parser.add_argument('--input-img-dir', help='Path to ImageNet training images')
+parser.add_argument('--output-dir', help='Path to output training lmdb dataset')
+parser.add_argument('--input-type', choices=['png', 'npy'],
+                    help='Type of input to encode: "png" for PNG images or "npy" for NPY features')
+parser.add_argument('--batch-size', type=int, default=10000,
+                    help='Batch size for processing images')
+# parser.add_argument('val-img-dir', 'Path to ImageNet validation images')
+# parser.add_argument('val-out', 'Path to output validation lmdb dataset')
+args = parser.parse_args()
+_10TB = 10 * (1 << 40)
+class LmdbDataExporter(object):
+    """
+    making LMDB database
+    """
+    # label_pattern = re.compile(r'/.*/.*?(\d+)$')
+    def __init__(self,
+                 img_dir=None,
+                 output_path=None,
+                 batch_size=None):
+        """
+            img_dir: imgs directory
+            output_path: LMDB output path
+        """
+        self.img_dir = img_dir
+        self.output_path = output_path
+        self.batch_size = batch_size
+        if not os.path.exists(img_dir):
+            raise Exception(f'{img_dir} does not exist!')
+        if not os.path.exists(output_path):
+            os.makedirs(output_path)
+        self.lmdb_env = lmdb.open(output_path, map_size=_10TB, max_dbs=4)
+        self.modalities = self._get_modalities()
+    def _get_modalities(self):
+        """Get list of modalities (subdirectories) in the input directory"""
+        return [d for d in os.listdir(self.img_dir)
+                if os.path.isdir(os.path.join(self.img_dir, d))]
+    def export(self):
+        idx = 0
+        results = []
+        st = time.time()
+        iter_img_lst = self.read_imgs()
+        length = self.get_length()
+        print(f'length: {length}')
+        while True:
+            items = []
+            try:
+                while len(items) < self.batch_size:
+                    items.append(next(iter_img_lst))
+            except StopIteration:
+                break
+            with ThreadPoolExecutor() as executor:
+                results.extend(executor.map(self._extract_once, items))
+            if len(results) >= self.batch_size:
+                self.save_to_lmdb(results)
+                idx += self.batch_size
+                et = time.time()
+                print(f'time: {(et-st)}(s)  count: {idx}')
+                st = time.time()
+                # Progressively decrease batch size for remaining items
+                remaining = length - idx
+                if remaining < self.batch_size:
+                    self.batch_size = max(remaining // 2, 1)
+                    print(f'batch_size is reduced to: {self.batch_size}')
+                del results[:]
+        et = time.time()
+        print(f'time: {(et-st)}(s)  count: {idx}')
+        self.save_to_lmdb(results)
+        # self.save_total(idx)
+        print('Total length:', len(results))
+        del results[:]
+    def save_to_lmdb(self, results):
+        """
+        persist to lmdb
+        """
+        with self.lmdb_env.begin(write=True) as txn:
+            while results:
+                img_key, img_byte = results.pop()
+                if img_key is None or img_byte is None:
+                    continue
+                txn.put(img_key, img_byte)
+    def save_total(self, total: int):
+        """
+        persist all numbers of imgs
+        """
+        with self.lmdb_env.begin(write=True, buffers=True) as txn:
+            txn.put('total'.encode(), str(total).encode())
+    def _extract_once(self, item) -> Tuple[bytes, bytes]:
+        image_name = item[1]
+        modality_data = item[2]  # Dictionary of modality -> file path
+        # Create a dictionary to store all modality data
+        data_dict = {}
+        # Read each modality's data
+        for modality, file_path in modality_data.items():
+            if args.input_type == 'image':
+                img = cv2.imread(file_path)
+                if img is None:
+                    print(f'{file_path} is a bad img file.')
+                    return None, None
+                _, img_byte = cv2.imencode('.png', img)
+                data_dict[modality] = img_byte.tobytes()
+            else:  # feature
+                try:
+                    import numpy as np
+                    features = np.load(file_path)
+                    data_dict[modality] = features.tobytes()
+                except Exception as e:
+                    print(f'Error loading {file_path}: {e}')
+                    return None, None
+        return (image_name.encode('ascii'), pickle.dumps(data_dict))
+    def get_length(self):
+        # Just count files in the first modality directory
+        if not self.modalities:
+            return 0
+        first_modality_dir = os.path.join(self.img_dir, self.modalities[0])
+        img_list = glob.glob(os.path.join(first_modality_dir, '*.npy'))
+        return len(img_list)
+    def _list_image_files_recursively(self, data_dir):
+        results = []
+        for entry in sorted(bf.listdir(data_dir)):
+            full_path = bf.join(data_dir, entry)
+            ext = entry.split(".")[-1]
+            if "." in entry and ext.lower() in ["jpg", "jpeg", "png", "gif", "npy"]:
+                results.append(full_path)
+            elif bf.isdir(full_path):
+                results.extend(self._list_image_files_recursively(full_path))
+        return results
+    def read_imgs(self):
+        # Create a dictionary to store files by their base name
+        file_groups = defaultdict(dict)
+        # File extension based on input type
+        extensions = ['.png'] if args.input_type == 'png' else ['.npy']
+        # Collect files from each modality
+        for modality in self.modalities:
+            modality_path = os.path.join(self.img_dir, modality)
+            for file_path in self._list_image_files_recursively(modality_path):
+                ext = os.path.splitext(file_path)[1].lower()
+                if ext in extensions:
+                    base_name = os.path.basename(file_path)
+                    file_groups[base_name][modality] = file_path
+        # Only yield complete groups
+        for idx, (base_name, modality_files) in enumerate(file_groups.items()):
+            if len(modality_files) == len(self.modalities):
+                item = (idx, base_name, modality_files)
+                yield item
+            else:
+                print(f"Skipping incomplete group {base_name}, found modalities: {list(modality_files.keys())}")
+if __name__ == '__main__':
+    input_img_dir = args.input_img_dir
+    output_dir = args.output_dir
+    exporter = LmdbDataExporter(
+        input_img_dir,
+        output_dir,
+        batch_size=args.batch_size)
+    exporter.export()

src/COP-GEN-Beta/datasets.py ADDED Viewed

	@@ -0,0 +1,885 @@

+from torch.utils.data import Dataset
+from torchvision import datasets
+import torchvision.transforms as transforms
+import numpy as np
+import torch
+import math
+import random
+from PIL import Image
+import os
+import glob
+import einops
+import torchvision.transforms.functional as F
+class UnlabeledDataset(Dataset):
+    def __init__(self, dataset):
+        self.dataset = dataset
+    def __len__(self):
+        return len(self.dataset)
+    def __getitem__(self, item):
+        # data = tuple(self.dataset[item][:-1])  # remove label
+        data = self.dataset[item]
+        if len(data) == 1:
+            data = data[0]
+        return data
+class LabeledDataset(Dataset):
+    def __init__(self, dataset, labels):
+        self.dataset = dataset
+        self.labels = labels
+    def __len__(self):
+        return len(self.dataset)
+    def __getitem__(self, item):
+        return self.dataset[item], self.labels[item]
+class CFGDataset(Dataset):  # for classifier free guidance
+    def __init__(self, dataset, p_uncond, empty_token):
+        self.dataset = dataset
+        self.p_uncond = p_uncond
+        self.empty_token = empty_token
+    def __len__(self):
+        return len(self.dataset)
+    def __getitem__(self, item):
+        x, y = self.dataset[item]
+        if random.random() < self.p_uncond:
+            y = self.empty_token
+        return x, y
+class DatasetFactory(object):
+    def __init__(self):
+        self.train = None
+        self.test = None
+    def get_split(self, split, labeled=False, nosplit=False):
+        if nosplit:
+            return self.dataset
+        if split == "train":
+            dataset = self.train
+        elif split == "test":
+            dataset = self.test
+        else:
+            raise ValueError
+        if self.has_label:
+            return dataset if labeled else UnlabeledDataset(dataset)
+        else:
+            assert not labeled
+            return dataset
+    def unpreprocess(self, v):  # to B C H W and [0, 1]
+        v = 0.5 * (v + 1.)
+        v.clamp_(0., 1.)
+        return v
+    @property
+    def has_label(self):
+        return True
+    @property
+    def data_shape(self):
+        raise NotImplementedError
+    @property
+    def data_dim(self):
+        return int(np.prod(self.data_shape))
+    @property
+    def fid_stat(self):
+        return None
+    def sample_label(self, n_samples, device):
+        raise NotImplementedError
+    def label_prob(self, k):
+        raise NotImplementedError
+# CIFAR10
+class CIFAR10(DatasetFactory):
+    r""" CIFAR10 dataset
+    Information of the raw dataset:
+         train: 50,000
+         test:  10,000
+         shape: 3 * 32 * 32
+    """
+    def __init__(self, path, random_flip=False, cfg=False, p_uncond=None):
+        super().__init__()
+        transform_train = [transforms.ToTensor(), transforms.Normalize(0.5, 0.5)]
+        transform_test = [transforms.ToTensor(), transforms.Normalize(0.5, 0.5)]
+        if random_flip:  # only for train
+            transform_train.append(transforms.RandomHorizontalFlip())
+        transform_train = transforms.Compose(transform_train)
+        transform_test = transforms.Compose(transform_test)
+        self.train = datasets.CIFAR10(path, train=True, transform=transform_train, download=True)
+        self.test = datasets.CIFAR10(path, train=False, transform=transform_test, download=True)
+        assert len(self.train.targets) == 50000
+        self.K = max(self.train.targets) + 1
+        self.cnt = torch.tensor([len(np.where(np.array(self.train.targets) == k)[0]) for k in range(self.K)]).float()
+        self.frac = [self.cnt[k] / 50000 for k in range(self.K)]
+        print(f'{self.K} classes')
+        print(f'cnt: {self.cnt}')
+        print(f'frac: {self.frac}')
+        if cfg:  # classifier free guidance
+            assert p_uncond is not None
+            print(f'prepare the dataset for classifier free guidance with p_uncond={p_uncond}')
+            self.train = CFGDataset(self.train, p_uncond, self.K)
+    @property
+    def data_shape(self):
+        return 3, 32, 32
+    @property
+    def fid_stat(self):
+        return 'assets/fid_stats/fid_stats_cifar10_train_pytorch.npz'
+    def sample_label(self, n_samples, device):
+        return torch.multinomial(self.cnt, n_samples, replacement=True).to(device)
+    def label_prob(self, k):
+        return self.frac[k]
+# ImageNet
+class FeatureDataset(Dataset):
+    def __init__(self, path):
+        super().__init__()
+        self.path = path
+        # names = sorted(os.listdir(path))
+        # self.files = [os.path.join(path, name) for name in names]
+    def __len__(self):
+        return 1_281_167 * 2  # consider the random flip
+    def __getitem__(self, idx):
+        path = os.path.join(self.path, f'{idx}.npy')
+        z, label = np.load(path, allow_pickle=True)
+        return z, label
+class MajorTOM_S2_FeatureDataset(Dataset):
+    def __init__(self, path, transform=None):
+        super().__init__()
+        self.path = path
+        self.transform = transform
+        # names = sorted(os.listdir(path))
+        # self.files = [os.path.join(path, name) for name in names]
+    def __len__(self):
+        return len(glob.glob(f"{self.path}/*.npy"))
+    def __getitem__(self, idx):
+        path = os.path.join(self.path, f"{idx}.npy")
+        moment = np.load(path, allow_pickle=True).copy()
+        if self.transform is not None:
+            moment = self.transform(moment)
+        return moment
+class MajorTOM_Tuples_FeatureDataset(Dataset):
+    def __init__(self, paths, transform=None):
+        super().__init__()
+        self.paths = paths
+        self.transform = transform
+        print(f"Gathering filenames...")
+        self.filenames = [os.path.splitext(os.path.basename(f))[0] for f in glob.glob(f"{self.paths[0]}/*.npy")]
+        print(f"Found {len(self.filenames)} filenames across all paths")
+    def __len__(self):
+        return len(self.filenames)
+    def __getitem__(self, idx):
+        # Return npy files for each modality. Always in the same order
+        moments = []
+        for path in self.paths:
+            path = os.path.join(path, f"{self.filenames[idx]}.npy")
+            moment = np.load(path, allow_pickle=True).copy()
+            if self.transform is not None:
+                moment = self.transform(moment)
+            moments.append(moment)
+        return moments
+class MajorTOM_S2_Features(DatasetFactory):  # the moments calculated by Stable Diffusion image encoder
+    def __init__(self, path, cfg=False, p_uncond=None):
+        super().__init__()
+        print("Prepare dataset...")
+        # transform_train = [transforms.ToTensor()]
+        transform_train = []
+        self.train = MajorTOM_S2_FeatureDataset(
+            path, transform=transforms.Compose(transform_train)
+        )
+        self.path = path
+        print("Prepare dataset ok")
+        self.K = 1000
+        if cfg:  # classifier free guidance
+            assert p_uncond is not None
+            print(f"prepare the dataset for classifier free guidance with p_uncond={p_uncond}")
+            self.train = CFGDataset(self.train, p_uncond, self.K)
+    def get_split(self, split, labeled=False):
+        if split == "train":
+            dataset = self.train
+        elif split == "test":
+            dataset = self.test
+        else:
+            raise ValueError
+        if self.has_label:
+            return dataset if labeled else UnlabeledDataset(dataset)
+        else:
+            assert not labeled
+            return dataset
+    @property
+    def data_shape(self):
+        return 4, 133, 133
+    @property
+    def fid_stat(self):
+        return f"assets/fid_stats/fid_stats_imagenet256_guided_diffusion.npz"
+    def sample_label(self, n_samples, device):
+        return torch.randint(0, 1000, (n_samples,), device=device)
+class MajorTOM_Tuples_Features(DatasetFactory):  # the moments calculated by Stable Diffusion image encoder
+    def __init__(self, paths, cfg=False, p_uncond=None):
+        super().__init__()
+        print("Prepare dataset...")
+        # transform_train = [transforms.ToTensor()]
+        transform_train = []
+        self.train = MajorTOM_Tuples_FeatureDataset(
+            paths, transform=transforms.Compose(transform_train)
+        )
+        self.paths = paths
+        print("Prepare dataset ok")
+        self.K = 1000
+        if cfg:  # classifier free guidance
+            assert p_uncond is not None
+            print(f"prepare the dataset for classifier free guidance with p_uncond={p_uncond}")
+            self.train = CFGDataset(self.train, p_uncond, self.K)
+    def get_split(self, split, labeled=False):
+        if split == "train":
+            dataset = self.train
+        elif split == "test":
+            dataset = self.test
+        else:
+            raise ValueError
+        if self.has_label:
+            return dataset if labeled else UnlabeledDataset(dataset)
+        else:
+            assert not labeled
+            return dataset
+    @property
+    def data_shape(self):
+        return "blablabla"
+    @property
+    def fid_stat(self):
+        return f"assets/fid_stats/fid_stats_imagenet256_guided_diffusion.npz"
+    def sample_label(self, n_samples, device):
+        raise NotImplementedError
+        return torch.randint(0, 1000, (n_samples,), device=device)
+class MajorTOM_Lmdb_FeatureDataset(Dataset):
+    def __init__(self, path, transform=None, return_filename=False):
+        super().__init__()
+        import pickle
+        self.transform = transform
+        self.path = path  # Store the path instead of the environment
+        self.return_filename = return_filename
+        # Create a temporary environment just to get the stats and keys
+        import lmdb
+        env = lmdb.open(
+            path,
+            max_readers=1,
+            readonly=True,
+            lock=False,
+            readahead=False,
+            meminit=False,
+        )
+        # Get total number of entries
+        with env.begin(write=False) as txn:
+            self.length = txn.stat()["entries"]
+        # Load or create cache of keys
+        root_split = path.split("/")
+        cache_file = os.path.join("/".join(root_split[:-1]), f"_cache_{root_split[-1]}")
+        if os.path.isfile(cache_file):
+            self.keys = pickle.load(open(cache_file, "rb"))
+        else:
+            with env.begin(write=False) as txn:
+                self.keys = [key for key, _ in txn.cursor()]
+            pickle.dump(self.keys, open(cache_file, "wb"))
+        # Close the temporary environment
+        env.close()
+        # Create environment lazily in each worker
+        self._env = None
+    def _init_db(self):
+        """Initialize LMDB environment"""
+        import lmdb
+        self._env = lmdb.open(
+            self.path,
+            max_readers=1,
+            readonly=True,
+            lock=False,
+            readahead=False,
+            meminit=False,
+        )
+    @property
+    def env(self):
+        """Get LMDB environment, creating it if necessary"""
+        if self._env is None:
+            self._init_db()
+        return self._env
+    def __len__(self):
+        return self.length
+    def __getitem__(self, idx):
+        # Get data from LMDB
+        import pickle
+        import numpy as np
+        key = self.keys[idx]
+        filename = key.decode('utf-8') if isinstance(key, bytes) else key
+        filename = os.path.basename(filename) # get filename without path
+        filename = os.path.splitext(filename)[0] # remove .npy extension
+        with self.env.begin(write=False) as txn:
+            data = pickle.loads(txn.get(key))
+        # Convert bytes to data for each modality
+        decoded_data = {}
+        for k, bytes_data in data.items():
+            # Convert bytes back to numpy array with the expected shape (8, 32, 32).
+            # TODO: This is currently hardcoded.
+            features = np.frombuffer(bytes_data, dtype=np.float32).reshape(8, 32, 32).copy()
+            decoded_data[k] = features
+        # Apply transforms if any
+        if self.transform is not None:
+            decoded_data = {k: self.transform(v) for k, v in decoded_data.items()}
+        # Convert the dictionary values to a list in a consistent order
+        moments = [decoded_data[k] for k in sorted(decoded_data.keys())]
+        if self.return_filename:
+            return moments, filename
+        return moments
+    def __del__(self):
+        if self._env is not None:
+            self._env.close()
+class MajorTOM_Lmdb_Features(DatasetFactory):
+    def __init__(self, path, cfg=False, p_uncond=None, return_filename=False):
+        super().__init__()
+        print("Prepare dataset...")
+        transform_train = []
+        self.return_filename = return_filename
+        self.train = MajorTOM_Lmdb_FeatureDataset(
+            path, transform=transforms.Compose(transform_train), return_filename=return_filename
+        )
+        self.path = path
+        print("Prepare dataset ok")
+        self.K = 1000
+        if cfg:  # classifier free guidance
+            assert p_uncond is not None
+            print(f"prepare the dataset for classifier free guidance with p_uncond={p_uncond}")
+            self.train = CFGDataset(self.train, p_uncond, self.K)
+    def get_split(self, split, labeled=False):
+        if split == "train":
+            dataset = self.train
+        elif split == "test":
+            dataset = self.test
+        else:
+            raise ValueError
+        if self.has_label:
+            return dataset if labeled else UnlabeledDataset(dataset)
+        else:
+            assert not labeled
+            return dataset
+    @property
+    def data_shape(self):
+        return "blablabla"
+    @property
+    def fid_stat(self):
+        return f"assets/fid_stats/fid_stats_imagenet256_guided_diffusion.npz"
+    def sample_label(self, n_samples, device):
+        raise NotImplementedError
+        return torch.randint(0, 1000, (n_samples,), device=device)
+class ImageNet256Features(DatasetFactory):  # the moments calculated by Stable Diffusion image encoder
+    def __init__(self, path, cfg=False, p_uncond=None):
+        super().__init__()
+        print('Prepare dataset...')
+        self.train = FeatureDataset(path)
+        print('Prepare dataset ok')
+        self.K = 1000
+        if cfg:  # classifier free guidance
+            assert p_uncond is not None
+            print(f'prepare the dataset for classifier free guidance with p_uncond={p_uncond}')
+            self.train = CFGDataset(self.train, p_uncond, self.K)
+    @property
+    def data_shape(self):
+        return 4, 32, 32
+    @property
+    def fid_stat(self):
+        return f'assets/fid_stats/fid_stats_imagenet256_guided_diffusion.npz'
+    def sample_label(self, n_samples, device):
+        return torch.randint(0, 1000, (n_samples,), device=device)
+class ImageNet512Features(DatasetFactory):  # the moments calculated by Stable Diffusion image encoder
+    def __init__(self, path, cfg=False, p_uncond=None):
+        super().__init__()
+        print('Prepare dataset...')
+        self.train = FeatureDataset(path)
+        print('Prepare dataset ok')
+        self.K = 1000
+        if cfg:  # classifier free guidance
+            assert p_uncond is not None
+            print(f'prepare the dataset for classifier free guidance with p_uncond={p_uncond}')
+            self.train = CFGDataset(self.train, p_uncond, self.K)
+    @property
+    def data_shape(self):
+        return 4, 64, 64
+    @property
+    def fid_stat(self):
+        return f'assets/fid_stats/fid_stats_imagenet512_guided_diffusion.npz'
+    def sample_label(self, n_samples, device):
+        return torch.randint(0, 1000, (n_samples,), device=device)
+class ImageNet(DatasetFactory):
+    def __init__(self, path, resolution, random_crop=False, random_flip=True):
+        super().__init__()
+        print(f'Counting ImageNet files from {path}')
+        train_files = _list_image_files_recursively(os.path.join(path, 'train'))
+        class_names = [os.path.basename(path).split("_")[0] for path in train_files]
+        sorted_classes = {x: i for i, x in enumerate(sorted(set(class_names)))}
+        train_labels = [sorted_classes[x] for x in class_names]
+        print('Finish counting ImageNet files')
+        self.train = ImageDataset(resolution, train_files, labels=train_labels, random_crop=random_crop, random_flip=random_flip)
+        self.resolution = resolution
+        if len(self.train) != 1_281_167:
+            print(f'Missing train samples: {len(self.train)} < 1281167')
+        self.K = max(self.train.labels) + 1
+        cnt = dict(zip(*np.unique(self.train.labels, return_counts=True)))
+        self.cnt = torch.tensor([cnt[k] for k in range(self.K)]).float()
+        self.frac = [self.cnt[k] / len(self.train.labels) for k in range(self.K)]
+        print(f'{self.K} classes')
+        print(f'cnt[:10]: {self.cnt[:10]}')
+        print(f'frac[:10]: {self.frac[:10]}')
+    @property
+    def data_shape(self):
+        return 3, self.resolution, self.resolution
+    @property
+    def fid_stat(self):
+        return f'assets/fid_stats/fid_stats_imagenet{self.resolution}_guided_diffusion.npz'
+    def sample_label(self, n_samples, device):
+        return torch.multinomial(self.cnt, n_samples, replacement=True).to(device)
+    def label_prob(self, k):
+        return self.frac[k]
+class MajorTOMThumbnail(DatasetFactory):
+    def __init__(self, path, resolution):
+        super().__init__()
+        print(f'Counting MajorTOM thumbnail files from {path}')
+        files_list = _list_image_files_recursively(path)
+        print('Finish counting MajorTOM thumbnail files')
+        self.dataset = MajorTOMThumbnailDataset(resolution, files_list)
+        self.resolution = resolution
+        if len(self.dataset) != 1_281_167:
+            print(f'Missing train samples: {len(self.dataset)} < 1281167')
+    @property
+    def data_shape(self):
+        return 3, self.resolution, self.resolution
+    @property
+    def has_label(self):
+        return False
+    @property
+    def fid_stat(self):
+        return f'assets/fid_stats/fid_stats_imagenet{self.resolution}_guided_diffusion.npz'
+class MajorTOMThumbnailDataset(Dataset):
+    def __init__(
+        self,
+        resolution,
+        image_paths,
+    ):
+        super().__init__()
+        self.resolution = resolution
+        self.image_paths = image_paths
+    def __len__(self):
+        return len(self.image_paths)
+    def __getitem__(self, idx):
+        path = self.image_paths[idx]
+        filename = os.path.basename(path).split('.')[0]
+        pil_image = Image.open(path)
+        pil_image.load()
+        pil_image = pil_image.convert("RGB")
+        # check that the image has the correct resolution
+        if pil_image.size != (self.resolution, self.resolution):
+            raise ValueError(f"Image at {path} has size {pil_image.size}, expected {self.resolution}x{self.resolution}")
+        arr = np.array(pil_image).astype(np.float32) / 127.5 - 1
+        return np.transpose(arr, [2, 0, 1]), filename
+def _list_image_files_recursively(data_dir):
+    results = []
+    for entry in sorted(os.listdir(data_dir)):
+        full_path = os.path.join(data_dir, entry)
+        ext = entry.split(".")[-1]
+        if "." in entry and ext.lower() in ["jpg", "jpeg", "png", "gif"]:
+            results.append(full_path)
+        elif os.listdir(full_path):
+            results.extend(_list_image_files_recursively(full_path))
+    return results
+class ImageDataset(Dataset):
+    def __init__(
+        self,
+        resolution,
+        image_paths,
+        labels,
+        random_crop=False,
+        random_flip=True,
+    ):
+        super().__init__()
+        self.resolution = resolution
+        self.image_paths = image_paths
+        self.labels = labels
+        self.random_crop = random_crop
+        self.random_flip = random_flip
+    def __len__(self):
+        return len(self.image_paths)
+    def __getitem__(self, idx):
+        path = self.image_paths[idx]
+        pil_image = Image.open(path)
+        pil_image.load()
+        pil_image = pil_image.convert("RGB")
+        if self.random_crop:
+            arr = random_crop_arr(pil_image, self.resolution)
+        else:
+            arr = center_crop_arr(pil_image, self.resolution)
+        if self.random_flip and random.random() < 0.5:
+            arr = arr[:, ::-1]
+        arr = arr.astype(np.float32) / 127.5 - 1
+        label = np.array(self.labels[idx], dtype=np.int64)
+        return np.transpose(arr, [2, 0, 1]), label
+def center_crop_arr(pil_image, image_size):
+    # We are not on a new enough PIL to support the `reducing_gap`
+    # argument, which uses BOX downsampling at powers of two first.
+    # Thus, we do it by hand to improve downsample quality.
+    while min(*pil_image.size) >= 2 * image_size:
+        pil_image = pil_image.resize(
+            tuple(x // 2 for x in pil_image.size), resample=Image.BOX
+        )
+    scale = image_size / min(*pil_image.size)
+    pil_image = pil_image.resize(
+        tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
+    )
+    arr = np.array(pil_image)
+    crop_y = (arr.shape[0] - image_size) // 2
+    crop_x = (arr.shape[1] - image_size) // 2
+    return arr[crop_y : crop_y + image_size, crop_x : crop_x + image_size]
+def random_crop_arr(pil_image, image_size, min_crop_frac=0.8, max_crop_frac=1.0):
+    min_smaller_dim_size = math.ceil(image_size / max_crop_frac)
+    max_smaller_dim_size = math.ceil(image_size / min_crop_frac)
+    smaller_dim_size = random.randrange(min_smaller_dim_size, max_smaller_dim_size + 1)
+    # We are not on a new enough PIL to support the `reducing_gap`
+    # argument, which uses BOX downsampling at powers of two first.
+    # Thus, we do it by hand to improve downsample quality.
+    while min(*pil_image.size) >= 2 * smaller_dim_size:
+        pil_image = pil_image.resize(
+            tuple(x // 2 for x in pil_image.size), resample=Image.BOX
+        )
+    scale = smaller_dim_size / min(*pil_image.size)
+    pil_image = pil_image.resize(
+        tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
+    )
+    arr = np.array(pil_image)
+    crop_y = random.randrange(arr.shape[0] - image_size + 1)
+    crop_x = random.randrange(arr.shape[1] - image_size + 1)
+    return arr[crop_y: crop_y + image_size, crop_x: crop_x + image_size]
+# CelebA
+class Crop(object):
+    def __init__(self, x1, x2, y1, y2):
+        self.x1 = x1
+        self.x2 = x2
+        self.y1 = y1
+        self.y2 = y2
+    def __call__(self, img):
+        return F.crop(img, self.x1, self.y1, self.x2 - self.x1, self.y2 - self.y1)
+    def __repr__(self):
+        return self.__class__.__name__ + "(x1={}, x2={}, y1={}, y2={})".format(
+            self.x1, self.x2, self.y1, self.y2
+        )
+class CelebA(DatasetFactory):
+    r""" train: 162,770
+         val:   19,867
+         test:  19,962
+         shape: 3 * width * width
+    """
+    def __init__(self, path, resolution=64):
+        super().__init__()
+        self.resolution = resolution
+        cx = 89
+        cy = 121
+        x1 = cy - 64
+        x2 = cy + 64
+        y1 = cx - 64
+        y2 = cx + 64
+        transform = transforms.Compose([Crop(x1, x2, y1, y2), transforms.Resize(self.resolution),
+                                        transforms.RandomHorizontalFlip(), transforms.ToTensor(),
+                                        transforms.Normalize(0.5, 0.5)])
+        self.train = datasets.CelebA(root=path, split="train", target_type=[], transform=transform, download=True)
+        self.train = UnlabeledDataset(self.train)
+    @property
+    def data_shape(self):
+        return 3, self.resolution, self.resolution
+    @property
+    def fid_stat(self):
+        return 'assets/fid_stats/fid_stats_celeba64_train_50000_ddim.npz'
+    @property
+    def has_label(self):
+        return False
+# MS COCO
+def center_crop(width, height, img):
+    resample = {'box': Image.BOX, 'lanczos': Image.LANCZOS}['lanczos']
+    crop = np.min(img.shape[:2])
+    img = img[(img.shape[0] - crop) // 2: (img.shape[0] + crop) // 2,
+          (img.shape[1] - crop) // 2: (img.shape[1] + crop) // 2]
+    try:
+        img = Image.fromarray(img, 'RGB')
+    except:
+        img = Image.fromarray(img)
+    img = img.resize((width, height), resample)
+    return np.array(img).astype(np.uint8)
+class MSCOCODatabase(Dataset):
+    def __init__(self, root, annFile, size=None):
+        from pycocotools.coco import COCO
+        self.root = root
+        self.height = self.width = size
+        self.coco = COCO(annFile)
+        self.keys = list(sorted(self.coco.imgs.keys()))
+    def _load_image(self, key: int):
+        path = self.coco.loadImgs(key)[0]["file_name"]
+        return Image.open(os.path.join(self.root, path)).convert("RGB")
+    def _load_target(self, key: int):
+        return self.coco.loadAnns(self.coco.getAnnIds(key))
+    def __len__(self):
+        return len(self.keys)
+    def __getitem__(self, index):
+        key = self.keys[index]
+        image = self._load_image(key)
+        image = np.array(image).astype(np.uint8)
+        image = center_crop(self.width, self.height, image).astype(np.float32)
+        image = (image / 127.5 - 1.0).astype(np.float32)
+        image = einops.rearrange(image, 'h w c -> c h w')
+        anns = self._load_target(key)
+        target = []
+        for ann in anns:
+            target.append(ann['caption'])
+        return image, target
+def get_feature_dir_info(root):
+    files = glob.glob(os.path.join(root, '*.npy'))
+    files_caption = glob.glob(os.path.join(root, '*_*.npy'))
+    num_data = len(files) - len(files_caption)
+    n_captions = {k: 0 for k in range(num_data)}
+    for f in files_caption:
+        name = os.path.split(f)[-1]
+        k1, k2 = os.path.splitext(name)[0].split('_')
+        n_captions[int(k1)] += 1
+    return num_data, n_captions
+class MSCOCOFeatureDataset(Dataset):
+    # the image features are got through sample
+    def __init__(self, root):
+        self.root = root
+        self.num_data, self.n_captions = get_feature_dir_info(root)
+    def __len__(self):
+        return self.num_data
+    def __getitem__(self, index):
+        z = np.load(os.path.join(self.root, f'{index}.npy'))
+        k = random.randint(0, self.n_captions[index] - 1)
+        c = np.load(os.path.join(self.root, f'{index}_{k}.npy'))
+        return z, c
+class MSCOCO256Features(DatasetFactory):  # the moments calculated by Stable Diffusion image encoder & the contexts calculated by clip
+    def __init__(self, path, cfg=False, p_uncond=None):
+        super().__init__()
+        print('Prepare dataset...')
+        self.train = MSCOCOFeatureDataset(os.path.join(path, 'train'))
+        self.test = MSCOCOFeatureDataset(os.path.join(path, 'val'))
+        assert len(self.train) == 82783
+        assert len(self.test) == 40504
+        print('Prepare dataset ok')
+        self.empty_context = np.load(os.path.join(path, 'empty_context.npy'))
+        if cfg:  # classifier free guidance
+            assert p_uncond is not None
+            print(f'prepare the dataset for classifier free guidance with p_uncond={p_uncond}')
+            self.train = CFGDataset(self.train, p_uncond, self.empty_context)
+        # text embedding extracted by clip
+        # for visulization in t2i
+        self.prompts, self.contexts = [], []
+        for f in sorted(os.listdir(os.path.join(path, 'run_vis')), key=lambda x: int(x.split('.')[0])):
+            prompt, context = np.load(os.path.join(path, 'run_vis', f), allow_pickle=True)
+            self.prompts.append(prompt)
+            self.contexts.append(context)
+        self.contexts = np.array(self.contexts)
+    @property
+    def data_shape(self):
+        return 4, 32, 32
+    @property
+    def fid_stat(self):
+        return f'assets/fid_stats/fid_stats_mscoco256_val.npz'
+def get_dataset(name, **kwargs):
+    if name == 'cifar10':
+        return CIFAR10(**kwargs)
+    elif name == 'imagenet':
+        return ImageNet(**kwargs)
+    elif name == 'imagenet256_features':
+        return ImageNet256Features(**kwargs)
+    elif name == 'imagenet512_features':
+        return ImageNet512Features(**kwargs)
+    elif name == "majorTOM_S2_256_features":
+        return MajorTOM_S2_Features(**kwargs)
+    elif name == "majorTOM_tuples_256_features":
+        return MajorTOM_Tuples_Features(**kwargs)
+    elif name == "majorTOM_lmdb_256_features":
+        return MajorTOM_Lmdb_Features(**kwargs)
+    elif name == 'celeba':
+        return CelebA(**kwargs)
+    elif name == 'mscoco256_features':
+        return MSCOCO256Features(**kwargs)
+    else:
+        raise NotImplementedError(name)

src/COP-GEN-Beta/dpm_solver_pp.py ADDED Viewed

	@@ -0,0 +1,952 @@

+import torch
+import torch.nn.functional as F
+import math
+import numpy as np
+import torch.distributed as dist
+def interpolate_fn(x: torch.Tensor, xp: torch.Tensor, yp: torch.Tensor) -> torch.Tensor:
+    """Performs piecewise linear interpolation for x, using xp and yp keypoints (knots).
+    Performs separate interpolation for each channel.
+    Args:
+        x: [N, C] points to be calibrated (interpolated). Batch with C channels.
+        xp: [C, K] x coordinates of the PWL knots. C is the number of channels, K is the number of knots.
+        yp: [C, K] y coordinates of the PWL knots. C is the number of channels, K is the number of knots.
+    Returns:
+        Interpolated points of the shape [N, C].
+    The piecewise linear function extends for the whole x axis (the outermost keypoints define the outermost
+    infinite lines).
+    For example:
+    >>> calibrate1d(torch.tensor([[0.5]]), torch.tensor([[0.0, 1.0]]), torch.tensor([[0.0, 2.0]]))
+    tensor([[1.0000]])
+    >>> calibrate1d(torch.tensor([[-10]]), torch.tensor([[0.0, 1.0]]), torch.tensor([[0.0, 2.0]]))
+    tensor([[-20.0000]])
+    """
+    x_breakpoints = torch.cat([x.unsqueeze(2), xp.unsqueeze(0).repeat((x.shape[0], 1, 1))], dim=2)
+    num_x_points = xp.shape[1]
+    sorted_x_breakpoints, x_indices = torch.sort(x_breakpoints, dim=2)
+    x_idx = torch.argmin(x_indices, dim=2)
+    cand_start_idx = x_idx - 1
+    start_idx = torch.where(
+        torch.eq(x_idx, 0),
+        torch.tensor(1, device=x.device),
+        torch.where(
+            torch.eq(x_idx, num_x_points), torch.tensor(num_x_points - 2, device=x.device), cand_start_idx,
+        ),
+    )
+    end_idx = torch.where(torch.eq(start_idx, cand_start_idx), start_idx + 2, start_idx + 1)
+    start_x = torch.gather(sorted_x_breakpoints, dim=2, index=start_idx.unsqueeze(2)).squeeze(2)
+    end_x = torch.gather(sorted_x_breakpoints, dim=2, index=end_idx.unsqueeze(2)).squeeze(2)
+    start_idx2 = torch.where(
+        torch.eq(x_idx, 0),
+        torch.tensor(0, device=x.device),
+        torch.where(
+            torch.eq(x_idx, num_x_points), torch.tensor(num_x_points - 2, device=x.device), cand_start_idx,
+        ),
+    )
+    y_positions_expanded = yp.unsqueeze(0).expand(x.shape[0], -1, -1)
+    start_y = torch.gather(y_positions_expanded, dim=2, index=start_idx2.unsqueeze(2)).squeeze(2)
+    end_y = torch.gather(y_positions_expanded, dim=2, index=(start_idx2 + 1).unsqueeze(2)).squeeze(2)
+    cand = start_y + (x - start_x) * (end_y - start_y) / (end_x - start_x)
+    return cand
+class NoiseScheduleVP:
+    def __init__(self, schedule='discrete', beta_0=1e-4, beta_1=2e-2, total_N=1000, betas=None, alphas_cumprod=None):
+        """Create a wrapper class for the forward SDE (VP type).
+        The forward SDE ensures that the condition distribution q_{t|0}(x_t | x_0) = N ( alpha_t * x_0, sigma_t^2 * I ).
+        We further define lambda_t = log(alpha_t) - log(sigma_t), which is the half-logSNR (described in the DPM-Solver paper).
+        Therefore, we implement the functions for computing alpha_t, sigma_t and lambda_t. For t in [0, T], we have:
+            log_alpha_t = self.marginal_log_mean_coeff(t)
+            sigma_t = self.marginal_std(t)
+            lambda_t = self.marginal_lambda(t)
+        Moreover, as lambda(t) is an invertible function, we also support its inverse function:
+            t = self.inverse_lambda(lambda_t)
+        ===============================================================
+        We support two types of VPSDEs: linear (DDPM) and cosine (improved-DDPM). The hyperparameters for the noise
+        schedule are the default settings in DDPM and improved-DDPM:
+            beta_min: A `float` number. The smallest beta for the linear schedule.
+            beta_max: A `float` number. The largest beta for the linear schedule.
+            cosine_s: A `float` number. The hyperparameter in the cosine schedule.
+            cosine_beta_max: A `float` number. The hyperparameter in the cosine schedule.
+            T: A `float` number. The ending time of the forward process.
+        Note that the original DDPM (linear schedule) used the discrete-time label (0 to 999). We convert the discrete-time
+        label to the continuous-time time (followed Song et al., 2021), so the beta here is 1000x larger than those in DDPM.
+        ===============================================================
+        Args:
+            schedule: A `str`. The noise schedule of the forward SDE ('linear' or 'cosine').
+        Returns:
+            A wrapper object of the forward SDE (VP type).
+        """
+        if schedule not in ['linear', 'discrete', 'cosine']:
+            raise ValueError("Unsupported noise schedule {}. The schedule needs to be 'linear' or 'cosine'".format(schedule))
+        self.total_N = total_N
+        self.beta_0 = beta_0 * 1000.
+        self.beta_1 = beta_1 * 1000.
+        if schedule == 'discrete':
+            if betas is not None:
+                log_alphas = 0.5 * torch.log(1 - betas).cumsum(dim=0)
+            else:
+                assert alphas_cumprod is not None
+                log_alphas = 0.5 * torch.log(alphas_cumprod)
+            self.total_N = len(log_alphas)
+            self.t_discrete = torch.linspace(1. / self.total_N, 1., self.total_N).reshape((1, -1))
+            self.log_alpha_discrete = log_alphas.reshape((1, -1))
+        self.cosine_s = 0.008
+        self.cosine_beta_max = 999.
+        self.cosine_t_max = math.atan(self.cosine_beta_max * (1. + self.cosine_s) / math.pi) * 2. * (1. + self.cosine_s) / math.pi - self.cosine_s
+        self.cosine_log_alpha_0 = math.log(math.cos(self.cosine_s / (1. + self.cosine_s) * math.pi / 2.))
+        self.schedule = schedule
+        if schedule == 'cosine':
+            # For the cosine schedule, T = 1 will have numerical issues. So we manually set the ending time T.
+            # Note that T = 0.9946 may be not the optimal setting. However, we find it works well.
+            self.T = 0.9946
+        else:
+            self.T = 1.
+    def marginal_log_mean_coeff(self, t):
+        """
+        Compute log(alpha_t) of a given continuous-time label t in [0, T].
+        """
+        if self.schedule == 'linear':
+            return -0.25 * t ** 2 * (self.beta_1 - self.beta_0) - 0.5 * t * self.beta_0
+        elif self.schedule == 'discrete':
+            return interpolate_fn(t.reshape((-1, 1)), self.t_discrete.clone().to(t.device), self.log_alpha_discrete.clone().to(t.device)).reshape((-1,))
+        elif self.schedule == 'cosine':
+            log_alpha_fn = lambda s: torch.log(torch.cos((s + self.cosine_s) / (1. + self.cosine_s) * math.pi / 2.))
+            log_alpha_t =  log_alpha_fn(t) - self.cosine_log_alpha_0
+            return log_alpha_t
+        else:
+            raise ValueError("Unsupported ")
+    def marginal_alpha(self, t):
+        return torch.exp(self.marginal_log_mean_coeff(t))
+    def marginal_std(self, t):
+        """
+        Compute sigma_t of a given continuous-time label t in [0, T].
+        """
+        return torch.sqrt(1. - torch.exp(2. * self.marginal_log_mean_coeff(t)))
+    def marginal_lambda(self, t):
+        """
+        Compute lambda_t = log(alpha_t) - log(sigma_t) of a given continuous-time label t in [0, T].
+        """
+        log_mean_coeff = self.marginal_log_mean_coeff(t)
+        log_std = 0.5 * torch.log(1. - torch.exp(2. * log_mean_coeff))
+        return log_mean_coeff - log_std
+    def inverse_lambda(self, lamb):
+        """
+        Compute the continuous-time label t in [0, T] of a given half-logSNR lambda_t.
+        """
+        if self.schedule == 'linear':
+            tmp = 2. * (self.beta_1 - self.beta_0) * torch.logaddexp(-2. * lamb, torch.zeros((1,)).to(lamb))
+            Delta = self.beta_0**2 + tmp
+            return tmp / (torch.sqrt(Delta) + self.beta_0) / (self.beta_1 - self.beta_0)
+        elif self.schedule == 'discrete':
+            log_alpha = -0.5 * torch.logaddexp(torch.zeros((1,)).to(lamb.device), -2. * lamb)
+            t = interpolate_fn(log_alpha.reshape((-1, 1)), torch.flip(self.log_alpha_discrete.clone().to(lamb.device), [1]), torch.flip(self.t_discrete.clone().to(lamb.device), [1]))
+            return t.reshape((-1,))
+        else:
+            log_alpha = -0.5 * torch.logaddexp(-2. * lamb, torch.zeros((1,)).to(lamb))
+            t_fn = lambda log_alpha_t: torch.arccos(torch.exp(log_alpha_t + self.cosine_log_alpha_0)) * 2. * (1. + self.cosine_s) / math.pi - self.cosine_s
+            t = t_fn(log_alpha)
+            return t
+def model_wrapper(model, noise_schedule=None, is_cond_classifier=False, classifier_fn=None, classifier_scale=1., time_input_type='1', total_N=1000, model_kwargs={}, is_deis=False):
+    """Create a wrapper function for the noise prediction model.
+    DPM-Solver needs to solve the continuous-time diffusion ODEs. For DPMs trained on discrete-time labels, we need to
+    firstly wrap the model function to a function that accepts the continuous time as the input.
+    The input `model` has the following format:
+    ``
+        model(x, t_input, **model_kwargs) -> noise
+    ``
+    where `x` and `noise` have the same shape, and `t_input` is the time label of the model.
+    (may be discrete-time labels (i.e. 0 to 999) or continuous-time labels (i.e. epsilon to T).)
+    We wrap the model function to the following format:
+    ``
+        def model_fn(x, t_continuous) -> noise:
+            t_input = get_model_input_time(t_continuous)
+            return model(x, t_input, **model_kwargs)
+    ``
+    where `t_continuous` is the continuous time labels (i.e. epsilon to T). And we use `model_fn` for DPM-Solver.
+    For DPMs with classifier guidance, we also combine the model output with the classifier gradient as used in [1].
+    [1] P. Dhariwal and A. Q. Nichol, "Diffusion models beat GANs on image synthesis," in Advances in Neural
+    Information Processing Systems, vol. 34, 2021, pp. 8780-8794.
+    ===============================================================
+    Args:
+        model: A noise prediction model with the following format:
+            ``
+                def model(x, t_input, **model_kwargs):
+                    return noise
+            ``
+        noise_schedule: A noise schedule object, such as NoiseScheduleVP. Only used for the classifier guidance.
+        is_cond_classifier: A `bool`. Whether to use the classifier guidance.
+        classifier_fn: A classifier function. Only used for the classifier guidance. The format is:
+            ``
+                def classifier_fn(x, t_input):
+                    return logits
+            ``
+        classifier_scale: A `float`. The scale for the classifier guidance.
+        time_input_type: A `str`. The type for the time input of the model. We support three types:
+            - '0': The continuous-time type. In this case, the model is trained on the continuous time,
+                so `t_input` = `t_continuous`.
+            - '1': The Type-1 discrete type described in the Appendix of DPM-Solver paper.
+                **For discrete-time DPMs, we recommend to use this type for DPM-Solver**.
+            - '2': The Type-2 discrete type described in the Appendix of DPM-Solver paper.
+        total_N: A `int`. The total number of the discrete-time DPMs (default is 1000), used when `time_input_type`
+            is '1' or '2'.
+        model_kwargs: A `dict`. A dict for the other inputs of the model function.
+    Returns:
+        A function that accepts the continuous time as the input, with the following format:
+            ``
+                def model_fn(x, t_continuous):
+                    t_input = get_model_input_time(t_continuous)
+                    return model(x, t_input, **model_kwargs)
+            ``
+    """
+    def get_model_input_time(t_continuous):
+        """
+        Convert the continuous-time `t_continuous` (in [epsilon, T]) to the model input time.
+        """
+        if time_input_type == '0':
+            # discrete_type == '0' means that the model is continuous-time model.
+            # For continuous-time DPMs, the continuous time equals to the discrete time.
+            return t_continuous
+        elif time_input_type == '1':
+            # Type-1 discrete label, as detailed in the Appendix of DPM-Solver.
+            return 1000. * torch.max(t_continuous - 1. / total_N, torch.zeros_like(t_continuous).to(t_continuous))
+        elif time_input_type == '2':
+            # Type-2 discrete label, as detailed in the Appendix of DPM-Solver.
+            max_N = (total_N - 1) / total_N * 1000.
+            return max_N * t_continuous
+        else:
+            raise ValueError("Unsupported time input type {}, must be '0' or '1' or '2'".format(time_input_type))
+    def cond_fn(x, t_discrete, y):
+        """
+        Compute the gradient of the classifier, multiplied with the sclae of the classifier guidance.
+        """
+        assert y is not None
+        with torch.enable_grad():
+            x_in = x.detach().requires_grad_(True)
+            logits = classifier_fn(x_in, t_discrete)
+            log_probs = F.log_softmax(logits, dim=-1)
+            selected = log_probs[range(len(logits)), y.view(-1)]
+            return classifier_scale * torch.autograd.grad(selected.sum(), x_in)[0]
+    def model_fn(x, t_continuous):
+        """
+        The noise predicition model function that is used for DPM-Solver.
+        """
+        if t_continuous.reshape((-1,)).shape[0] == 1:
+            t_continuous = torch.ones((x.shape[0],)).to(x.device) * t_continuous
+        if is_cond_classifier:
+            y = model_kwargs.get("y", None)
+            if y is None:
+                raise ValueError("For classifier guidance, the label y has to be in the input.")
+            t_discrete = get_model_input_time(t_continuous)
+            noise_uncond = model(x, t_discrete, **model_kwargs)
+            cond_grad = cond_fn(x, t_discrete, y)
+            if is_deis:
+                sigma_t = noise_schedule.marginal_std(t_continuous / 1000.)
+            else:
+                sigma_t = noise_schedule.marginal_std(t_continuous)
+            dims = len(cond_grad.shape) - 1
+            return noise_uncond - sigma_t[(...,) + (None,)*dims] * cond_grad
+        else:
+            t_discrete = get_model_input_time(t_continuous)
+            return model(x, t_discrete, **model_kwargs)
+    return model_fn
+class DPM_Solver:
+    def __init__(self, model_fn, noise_schedule, predict_x0=False, thresholding=False, max_val=1.):
+        """Construct a DPM-Solver.
+        Args:
+            model_fn: A noise prediction model function which accepts the continuous-time input
+                (t in [epsilon, T]):
+                ``
+                def model_fn(x, t_continuous):
+                    return noise
+                ``
+            noise_schedule: A noise schedule object, such as NoiseScheduleVP.
+        """
+        self.model = model_fn
+        self.noise_schedule = noise_schedule
+        self.predict_x0 = predict_x0
+        self.thresholding = thresholding
+        self.max_val = max_val
+    def model_fn(self, x, t):
+        if self.predict_x0:
+            alpha_t, sigma_t = self.noise_schedule.marginal_alpha(t), self.noise_schedule.marginal_std(t)
+            noise = self.model(x, t)
+            dims = len(x.shape) - 1
+            x0 = (x - sigma_t[(...,) + (None,)*dims] * noise) / alpha_t[(...,) + (None,)*dims]
+            if self.thresholding:
+                p = 0.995
+                s = torch.quantile(torch.abs(x0).reshape((x0.shape[0], -1)), p, dim=1)
+                s = torch.maximum(s, torch.ones_like(s).to(s.device))[(...,) + (None,)*dims]
+                x0 = torch.clamp(x0, -s, s) / (s / self.max_val)
+            return x0
+        else:
+            return self.model(x, t)
+    def get_time_steps(self, skip_type, t_T, t_0, N, device):
+        """Compute the intermediate time steps for sampling.
+        Args:
+            skip_type: A `str`. The type for the spacing of the time steps. We support three types:
+                - 'logSNR': uniform logSNR for the time steps, **recommended for DPM-Solver**.
+                - 'time_uniform': uniform time for the time steps. (Used in DDIM and DDPM.)
+                - 'time_quadratic': quadratic time for the time steps. (Used in DDIM for low-resolutional data.)
+            t_T: A `float`. The starting time of the sampling (default is T).
+            t_0: A `float`. The ending time of the sampling (default is epsilon).
+            N: A `int`. The total number of the spacing of the time steps.
+            device: A torch device.
+        Returns:
+            A pytorch tensor of the time steps, with the shape (N + 1,).
+        """
+        if skip_type == 'logSNR':
+            lambda_T = self.noise_schedule.marginal_lambda(torch.tensor(t_T).to(device))
+            lambda_0 = self.noise_schedule.marginal_lambda(torch.tensor(t_0).to(device))
+            logSNR_steps = torch.linspace(lambda_T.cpu().item(), lambda_0.cpu().item(), N + 1).to(device)
+            # print(torch.min(torch.abs(logSNR_steps - self.noise_schedule.marginal_lambda(self.noise_schedule.inverse_lambda(logSNR_steps)))).item())
+            return self.noise_schedule.inverse_lambda(logSNR_steps)
+        elif skip_type == 't2':
+            t_order = 2
+            t = torch.linspace(t_T**(1. / t_order), t_0**(1. / t_order), N + 1).pow(t_order).to(device)
+            return t
+        elif skip_type == 'time_uniform':
+            return torch.linspace(t_T, t_0, N + 1).to(device)
+        elif skip_type == 'time_quadratic':
+            t = torch.linspace(t_0, t_T, 10000000).to(device)
+            quadratic_t = torch.sqrt(t)
+            quadratic_steps = torch.linspace(quadratic_t[0], quadratic_t[-1], N + 1).to(device)
+            return torch.flip(torch.cat([t[torch.searchsorted(quadratic_t, quadratic_steps)[:-1]], t_T * torch.ones((1,)).to(device)], dim=0), dims=[0])
+        else:
+            raise ValueError("Unsupported skip_type {}, need to be 'logSNR' or 'time_uniform' or 'time_quadratic'".format(skip_type))
+    def get_time_steps_for_dpm_solver_fast(self, skip_type, t_T, t_0, steps, order, device):
+        """
+        Compute the intermediate time steps and the order of each step for sampling by DPM-Solver-fast.
+        We recommend DPM-Solver-fast for fast sampling of DPMs. Given a fixed number of function evaluations by `steps`,
+        the sampling procedure by DPM-Solver-fast is:
+            - Denote K = (steps // 3 + 1). We take K intermediate time steps for sampling.
+            - If steps % 3 == 0, we use (K - 2) steps of DPM-Solver-3, and 1 step of DPM-Solver-2 and 1 step of DPM-Solver-1.
+            - If steps % 3 == 1, we use (K - 1) steps of DPM-Solver-3 and 1 step of DPM-Solver-1.
+            - If steps % 3 == 2, we use (K - 1) steps of DPM-Solver-3 and 1 step of DPM-Solver-2.
+        ============================================
+        Args:
+            t_T: A `float`. The starting time of the sampling (default is T).
+            t_0: A `float`. The ending time of the sampling (default is epsilon).
+            steps: A `int`. The total number of function evaluations (NFE).
+            device: A torch device.
+        Returns:
+            orders: A list of the solver order of each step.
+            timesteps: A pytorch tensor of the time steps, with the shape of (K + 1,).
+        """
+        if order == 3:
+            K = steps // 3 + 1
+            if steps % 3 == 0:
+                orders = [3,] * (K - 2) + [2, 1]
+            elif steps % 3 == 1:
+                orders = [3,] * (K - 1) + [1]
+            else:
+                orders = [3,] * (K - 1) + [2]
+            timesteps = self.get_time_steps(skip_type, t_T, t_0, K, device)
+            return orders, timesteps
+        elif order == 2:
+            K = steps // 2
+            if steps % 2 == 0:
+                orders = [2,] * K
+            else:
+                orders = [2,] * K + [1]
+            timesteps = self.get_time_steps(skip_type, t_T, t_0, K, device)
+            return orders, timesteps
+        else:
+            raise ValueError("order must >= 2")
+    def denoise_fn(self, x, s, noise_s=None):
+        ns = self.noise_schedule
+        dims = len(x.shape) - 1
+        log_alpha_s = ns.marginal_log_mean_coeff(s)
+        sigma_s = ns.marginal_std(s)
+        if noise_s is None:
+            noise_s = self.model_fn(x, s)
+        x_0 = (
+            (x - sigma_s[(...,) + (None,)*dims] * noise_s) / torch.exp(log_alpha_s)[(...,) + (None,)*dims]
+        )
+        return x_0
+    def dpm_solver_first_update(self, x, s, t, noise_s=None, return_noise=False):
+        """
+        A single step for DPM-Solver-1.
+        Args:
+            x: A pytorch tensor. The initial value at time `s`.
+            s: A pytorch tensor. The starting time, with the shape (x.shape[0],).
+            t: A pytorch tensor. The ending time, with the shape (x.shape[0],).
+            return_noise: A `bool`. If true, also return the predicted noise at time `s`.
+        Returns:
+            x_t: A pytorch tensor. The approximated solution at time `t`.
+        """
+        ns = self.noise_schedule
+        dims = len(x.shape) - 1
+        lambda_s, lambda_t = ns.marginal_lambda(s), ns.marginal_lambda(t)
+        h = lambda_t - lambda_s
+        log_alpha_s, log_alpha_t = ns.marginal_log_mean_coeff(s), ns.marginal_log_mean_coeff(t)
+        sigma_s, sigma_t = ns.marginal_std(s), ns.marginal_std(t)
+        alpha_t = torch.exp(log_alpha_t)
+        if self.predict_x0:
+            phi_1 = (torch.exp(-h) - 1.) / (-1.)
+            if noise_s is None:
+                noise_s = self.model_fn(x, s)
+            x_t = (
+                (sigma_t / sigma_s)[(...,) + (None,)*dims] * x
+                + (alpha_t * phi_1)[(...,) + (None,)*dims] * noise_s
+            )
+            if return_noise:
+                return x_t, {'noise_s': noise_s}
+            else:
+                return x_t
+        else:
+            phi_1 = torch.expm1(h)
+            if noise_s is None:
+                noise_s = self.model_fn(x, s)
+            x_t = (
+                torch.exp(log_alpha_t - log_alpha_s)[(...,) + (None,)*dims] * x
+                - (sigma_t * phi_1)[(...,) + (None,)*dims] * noise_s
+            )
+            if return_noise:
+                return x_t, {'noise_s': noise_s}
+            else:
+                return x_t
+    def dpm_solver_second_update(self, x, s, t, r1=0.5, noise_s=None, return_noise=False, solver_type='dpm_solver'):
+        """
+        A single step for DPM-Solver-2.
+        Args:
+            x: A pytorch tensor. The initial value at time `s`.
+            s: A pytorch tensor. The starting time, with the shape (x.shape[0],).
+            t: A pytorch tensor. The ending time, with the shape (x.shape[0],).
+            r1: A `float`. The hyperparameter of the second-order solver. We recommend the default setting `0.5`.
+            noise_s: A pytorch tensor. The predicted noise at time `s`.
+                If `noise_s` is None, we compute the predicted noise by `x` and `s`; otherwise we directly use it.
+            return_noise: A `bool`. If true, also return the predicted noise at time `s` and `s1` (the intermediate time).
+        Returns:
+            x_t: A pytorch tensor. The approximated solution at time `t`.
+        """
+        if r1 is None:
+            r1 = 0.5
+        ns = self.noise_schedule
+        dims = len(x.shape) - 1
+        lambda_s, lambda_t = ns.marginal_lambda(s), ns.marginal_lambda(t)
+        h = lambda_t - lambda_s
+        lambda_s1 = lambda_s + r1 * h
+        s1 = ns.inverse_lambda(lambda_s1)
+        log_alpha_s, log_alpha_s1, log_alpha_t = ns.marginal_log_mean_coeff(s), ns.marginal_log_mean_coeff(s1), ns.marginal_log_mean_coeff(t)
+        sigma_s, sigma_s1, sigma_t = ns.marginal_std(s), ns.marginal_std(s1), ns.marginal_std(t)
+        alpha_s1, alpha_t = torch.exp(log_alpha_s1), torch.exp(log_alpha_t)
+        if self.predict_x0:
+            phi_11 = torch.expm1(-r1 * h)
+            phi_1 = torch.expm1(-h)
+            if noise_s is None:
+                noise_s = self.model_fn(x, s)
+            x_s1 = (
+                (sigma_s1 / sigma_s)[(...,) + (None,)*dims] * x
+                - (alpha_s1 * phi_11)[(...,) + (None,)*dims] * noise_s
+            )
+            noise_s1 = self.model_fn(x_s1, s1)
+            if solver_type == 'dpm_solver':
+                x_t = (
+                    (sigma_t / sigma_s)[(...,) + (None,)*dims] * x
+                    - (alpha_t * phi_1)[(...,) + (None,)*dims] * noise_s
+                    - (0.5 / r1) * (alpha_t * phi_1)[(...,) + (None,)*dims] * (noise_s1 - noise_s)
+                )
+            elif solver_type == 'taylor':
+                x_t = (
+                    (sigma_t / sigma_s)[(...,) + (None,)*dims] * x
+                    - (alpha_t * phi_1)[(...,) + (None,)*dims] * noise_s
+                    + (1. / r1) * (alpha_t * ((torch.exp(-h) - 1.) / h + 1.))[(...,) + (None,)*dims] * (noise_s1 - noise_s)
+                )
+            else:
+                raise ValueError("solver_type must be either dpm_solver or taylor, got {}".format(solver_type))
+        else:
+            phi_11 = torch.expm1(r1 * h)
+            phi_1 = torch.expm1(h)
+            if noise_s is None:
+                noise_s = self.model_fn(x, s)
+            x_s1 = (
+                torch.exp(log_alpha_s1 - log_alpha_s)[(...,) + (None,)*dims] * x
+                - (sigma_s1 * phi_11)[(...,) + (None,)*dims] * noise_s
+            )
+            noise_s1 = self.model_fn(x_s1, s1)
+            if solver_type == 'dpm_solver':
+                x_t = (
+                    torch.exp(log_alpha_t - log_alpha_s)[(...,) + (None,)*dims] * x
+                    - (sigma_t * phi_1)[(...,) + (None,)*dims] * noise_s
+                    - (0.5 / r1) * (sigma_t * phi_1)[(...,) + (None,)*dims] * (noise_s1 - noise_s)
+                )
+            elif solver_type == 'taylor':
+                x_t = (
+                    torch.exp(log_alpha_t - log_alpha_s)[(...,) + (None,)*dims] * x
+                    - (sigma_t * phi_1)[(...,) + (None,)*dims] * noise_s
+                    - (1. / r1) * (sigma_t * ((torch.exp(h) - 1.) / h - 1.))[(...,) + (None,)*dims] * (noise_s1 - noise_s)
+                )
+            else:
+                raise ValueError("solver_type must be either dpm_solver or taylor, got {}".format(solver_type))
+        if return_noise:
+            return x_t, {'noise_s': noise_s, 'noise_s1': noise_s1}
+        else:
+            return x_t
+    def dpm_multistep_second_update(self, x, noise_prev_list, t_prev_list, t, solver_type="dpm_solver"):
+        ns = self.noise_schedule
+        dims = len(x.shape) - 1
+        noise_prev_1, noise_prev_0 = noise_prev_list
+        t_prev_1, t_prev_0 = t_prev_list
+        lambda_prev_1, lambda_prev_0, lambda_t = ns.marginal_lambda(t_prev_1), ns.marginal_lambda(t_prev_0), ns.marginal_lambda(t)
+        log_alpha_prev_0, log_alpha_t = ns.marginal_log_mean_coeff(t_prev_0), ns.marginal_log_mean_coeff(t)
+        sigma_prev_0, sigma_t = ns.marginal_std(t_prev_0), ns.marginal_std(t)
+        alpha_t = torch.exp(log_alpha_t)
+        h_0 = lambda_prev_0 - lambda_prev_1
+        h = lambda_t - lambda_prev_0
+        r0 = h_0 / h
+        D1_0 = (1. / r0)[(...,) + (None,)*dims] * (noise_prev_0 - noise_prev_1)
+        if self.predict_x0:
+            if solver_type == 'taylor':
+                x_t = (
+                    (sigma_t / sigma_prev_0)[(...,) + (None,)*dims] * x
+                    - (alpha_t * (torch.exp(-h) - 1.))[(...,) + (None,)*dims] * noise_prev_0
+                    + (alpha_t * ((torch.exp(-h) - 1.) / h + 1.))[(...,) + (None,)*dims] * D1_0
+                )
+            elif solver_type == 'dpm_solver':
+                x_t = (
+                    (sigma_t / sigma_prev_0)[(...,) + (None,)*dims] * x
+                    - (alpha_t * (torch.exp(-h) - 1.))[(...,) + (None,)*dims] * noise_prev_0
+                    - 0.5 * (alpha_t * (torch.exp(-h) - 1.))[(...,) + (None,)*dims] * D1_0
+                )
+        else:
+            if solver_type == 'taylor':
+                x_t = (
+                    torch.exp(log_alpha_t - log_alpha_prev_0)[(...,) + (None,)*dims] * x
+                    - (sigma_t * (torch.exp(h) - 1.))[(...,) + (None,)*dims] * noise_prev_0
+                    - (sigma_t * ((torch.exp(h) - 1.) / h - 1.))[(...,) + (None,)*dims] * D1_0
+                )
+            elif solver_type == 'dpm_solver':
+                x_t = (
+                    torch.exp(log_alpha_t - log_alpha_prev_0)[(...,) + (None,)*dims] * x
+                    - (sigma_t * (torch.exp(h) - 1.))[(...,) + (None,)*dims] * noise_prev_0
+                    - 0.5 * (sigma_t * (torch.exp(h) - 1.))[(...,) + (None,)*dims] * D1_0
+                )
+        return x_t
+    def dpm_multistep_third_update(self, x, noise_prev_list, t_prev_list, t, solver_type='dpm_solver'):
+        ns = self.noise_schedule
+        dims = len(x.shape) - 1
+        noise_prev_2, noise_prev_1, noise_prev_0 = noise_prev_list
+        t_prev_2, t_prev_1, t_prev_0 = t_prev_list
+        lambda_prev_2, lambda_prev_1, lambda_prev_0, lambda_t = ns.marginal_lambda(t_prev_2), ns.marginal_lambda(t_prev_1), ns.marginal_lambda(t_prev_0), ns.marginal_lambda(t)
+        log_alpha_prev_0, log_alpha_t = ns.marginal_log_mean_coeff(t_prev_0), ns.marginal_log_mean_coeff(t)
+        sigma_prev_0, sigma_t = ns.marginal_std(t_prev_0), ns.marginal_std(t)
+        alpha_t = torch.exp(log_alpha_t)
+        h_1 = lambda_prev_1 - lambda_prev_2
+        h_0 = lambda_prev_0 - lambda_prev_1
+        h = lambda_t - lambda_prev_0
+        r0, r1 = h_0 / h, h_1 / h
+        D1_0 = (1. / r0)[(...,) + (None,)*dims] * (noise_prev_0 - noise_prev_1)
+        D1_1 = (1. / r1)[(...,) + (None,)*dims] * (noise_prev_1 - noise_prev_2)
+        D1 = D1_0 + (r0 / (r0 + r1))[(...,) + (None,)*dims] * (D1_0 - D1_1)
+        D2 = (1. / (r0 + r1))[(...,) + (None,)*dims] * (D1_0 - D1_1)
+        if self.predict_x0:
+            x_t = (
+                (sigma_t / sigma_prev_0)[(...,) + (None,)*dims] * x
+                - (alpha_t * (torch.exp(-h) - 1.))[(...,) + (None,)*dims] * noise_prev_0
+                + (alpha_t * ((torch.exp(-h) - 1.) / h + 1.))[(...,) + (None,)*dims] * D1
+                - (alpha_t * ((torch.exp(-h) - 1. + h) / h**2 - 0.5))[(...,) + (None,)*dims] * D2
+            )
+        else:
+            x_t = (
+                torch.exp(log_alpha_t - log_alpha_prev_0)[(...,) + (None,)*dims] * x
+                - (sigma_t * (torch.exp(h) - 1.))[(...,) + (None,)*dims] * noise_prev_0
+                - (sigma_t * ((torch.exp(h) - 1.) / h - 1.))[(...,) + (None,)*dims] * D1
+                - (sigma_t * ((torch.exp(h) - 1. - h) / h**2 - 0.5))[(...,) + (None,)*dims] * D2
+            )
+        return x_t
+    def dpm_solver_third_update(self, x, s, t, r1=1./3., r2=2./3., noise_s=None, noise_s1=None, noise_s2=None, return_noise=False, solver_type='dpm_solver'):
+        """
+        A single step for DPM-Solver-3.
+        Args:
+            x: A pytorch tensor. The initial value at time `s`.
+            s: A pytorch tensor. The starting time, with the shape (x.shape[0],).
+            t: A pytorch tensor. The ending time, with the shape (x.shape[0],).
+            r1: A `float`. The hyperparameter of the third-order solver. We recommend the default setting `1 / 3`.
+            r2: A `float`. The hyperparameter of the third-order solver. We recommend the default setting `2 / 3`.
+            noise_s: A pytorch tensor. The predicted noise at time `s`.
+                If `noise_s` is None, we compute the predicted noise by `x` and `s`; otherwise we directly use it.
+            noise_s1: A pytorch tensor. The predicted noise at time `s1` (the intermediate time given by `r1`).
+                If `noise_s1` is None, we compute the predicted noise by `s1`; otherwise we directly use it.
+        Returns:
+            x_t: A pytorch tensor. The approximated solution at time `t`.
+        """
+        if r1 is None:
+            r1 = 1. / 3.
+        if r2 is None:
+            r2 = 2. / 3.
+        ns = self.noise_schedule
+        dims = len(x.shape) - 1
+        lambda_s, lambda_t = ns.marginal_lambda(s), ns.marginal_lambda(t)
+        h = lambda_t - lambda_s
+        lambda_s1 = lambda_s + r1 * h
+        lambda_s2 = lambda_s + r2 * h
+        s1 = ns.inverse_lambda(lambda_s1)
+        s2 = ns.inverse_lambda(lambda_s2)
+        log_alpha_s, log_alpha_s1, log_alpha_s2, log_alpha_t = ns.marginal_log_mean_coeff(s), ns.marginal_log_mean_coeff(s1), ns.marginal_log_mean_coeff(s2), ns.marginal_log_mean_coeff(t)
+        sigma_s, sigma_s1, sigma_s2, sigma_t = ns.marginal_std(s), ns.marginal_std(s1), ns.marginal_std(s2), ns.marginal_std(t)
+        alpha_s1, alpha_s2, alpha_t = torch.exp(log_alpha_s1), torch.exp(log_alpha_s2), torch.exp(log_alpha_t)
+        if self.predict_x0:
+            phi_11 = torch.expm1(-r1 * h)
+            phi_12 = torch.expm1(-r2 * h)
+            phi_1 = torch.expm1(-h)
+            phi_22 = torch.expm1(-r2 * h) / (r2 * h) + 1.
+            phi_2 = phi_1 / h + 1.
+            phi_3 = phi_2 / h - 0.5
+            if noise_s is None:
+                noise_s = self.model_fn(x, s)
+            if noise_s1 is None:
+                x_s1 = (
+                    (sigma_s1 / sigma_s)[(...,) + (None,)*dims] * x
+                    - (alpha_s1 * phi_11)[(...,) + (None,)*dims] * noise_s
+                )
+                noise_s1 = self.model_fn(x_s1, s1)
+            if noise_s2 is None:
+                x_s2 = (
+                    (sigma_s2 / sigma_s)[(...,) + (None,)*dims] * x
+                    - (alpha_s2 * phi_12)[(...,) + (None,)*dims] * noise_s
+                    + r2 / r1 * (alpha_s2 * phi_22)[(...,) + (None,)*dims] * (noise_s1 - noise_s)
+                )
+                noise_s2 = self.model_fn(x_s2, s2)
+            if solver_type == 'dpm_solver':
+                x_t = (
+                    (sigma_t / sigma_s)[(...,) + (None,)*dims] * x
+                    - (alpha_t * phi_1)[(...,) + (None,)*dims] * noise_s
+                    + (1. / r2) * (alpha_t * phi_2)[(...,) + (None,)*dims] * (noise_s2 - noise_s)
+                )
+            elif solver_type == 'taylor':
+                D1_0 = (1. / r1) * (noise_s1 - noise_s)
+                D1_1 = (1. / r2) * (noise_s2 - noise_s)
+                D1 = (r2 * D1_0 - r1 * D1_1) / (r2 - r1)
+                D2 = 2. * (D1_1 - D1_0) / (r2 - r1)
+                x_t = (
+                    (sigma_t / sigma_s)[(...,) + (None,)*dims] * x
+                    - (alpha_t * phi_1)[(...,) + (None,)*dims] * noise_s
+                    + (alpha_t * phi_2)[(...,) + (None,)*dims] * D1
+                    - (alpha_t * phi_3)[(...,) + (None,)*dims] * D2
+                )
+            else:
+                raise ValueError("solver_type must be either dpm_solver or dpm_solver++, got {}".format(solver_type))
+        else:
+            phi_11 = torch.expm1(r1 * h)
+            phi_12 = torch.expm1(r2 * h)
+            phi_1 = torch.expm1(h)
+            phi_22 = torch.expm1(r2 * h) / (r2 * h) - 1.
+            phi_2 = phi_1 / h - 1.
+            phi_3 = phi_2 / h - 0.5
+            if noise_s is None:
+                noise_s = self.model_fn(x, s)
+            if noise_s1 is None:
+                x_s1 = (
+                    torch.exp(log_alpha_s1 - log_alpha_s)[(...,) + (None,)*dims] * x
+                    - (sigma_s1 * phi_11)[(...,) + (None,)*dims] * noise_s
+                )
+                noise_s1 = self.model_fn(x_s1, s1)
+            if noise_s2 is None:
+                x_s2 = (
+                    torch.exp(log_alpha_s2 - log_alpha_s)[(...,) + (None,)*dims] * x
+                    - (sigma_s2 * phi_12)[(...,) + (None,)*dims] * noise_s
+                    - r2 / r1 * (sigma_s2 * phi_22)[(...,) + (None,)*dims] * (noise_s1 - noise_s)
+                )
+                noise_s2 = self.model_fn(x_s2, s2)
+            if solver_type == 'dpm_solver':
+                x_t = (
+                    torch.exp(log_alpha_t - log_alpha_s)[(...,) + (None,)*dims] * x
+                    - (sigma_t * phi_1)[(...,) + (None,)*dims] * noise_s
+                    - (1. / r2) * (sigma_t * phi_2)[(...,) + (None,)*dims] * (noise_s2 - noise_s)
+                )
+            elif solver_type == 'taylor':
+                D1_0 = (1. / r1) * (noise_s1 - noise_s)
+                D1_1 = (1. / r2) * (noise_s2 - noise_s)
+                D1 = (r2 * D1_0 - r1 * D1_1) / (r2 - r1)
+                D2 = 2. * (D1_1 - D1_0) / (r2 - r1)
+                x_t = (
+                    torch.exp(log_alpha_t - log_alpha_s)[(...,) + (None,)*dims] * x
+                    - (sigma_t * phi_1)[(...,) + (None,)*dims] * noise_s
+                    - (sigma_t * phi_2)[(...,) + (None,)*dims] * D1
+                    - (sigma_t * phi_3)[(...,) + (None,)*dims] * D2
+                )
+            else:
+                raise ValueError("solver_type must be either dpm_solver or dpm_solver++, got {}".format(solver_type))
+        if return_noise:
+            return x_t, {'noise_s': noise_s, 'noise_s1': noise_s1, 'noise_s2': noise_s2}
+        else:
+            return x_t
+    def dpm_solver_update(self, x, s, t, order, return_noise=False, solver_type='dpm_solver', r1=None, r2=None):
+        """
+        A single step for DPM-Solver of the given order `order`.
+        Args:
+            x: A pytorch tensor. The initial value at time `s`.
+            s: A pytorch tensor. The starting time, with the shape (x.shape[0],).
+            t: A pytorch tensor. The ending time, with the shape (x.shape[0],).
+            order: A `int`. The order of DPM-Solver. We only support order == 1 or 2 or 3.
+        Returns:
+            x_t: A pytorch tensor. The approximated solution at time `t`.
+        """
+        if order == 1:
+            return self.dpm_solver_first_update(x, s, t, return_noise=return_noise)
+        elif order == 2:
+            return self.dpm_solver_second_update(x, s, t, return_noise=return_noise, solver_type=solver_type, r1=r1)
+        elif order == 3:
+            return self.dpm_solver_third_update(x, s, t, return_noise=return_noise, solver_type=solver_type, r1=r1, r2=r2)
+        else:
+            raise ValueError("Solver order must be 1 or 2 or 3, got {}".format(order))
+    def dpm_multistep_update(self, x, noise_prev_list, t_prev_list, t, order, solver_type='taylor'):
+        """
+        A single step for DPM-Solver of the given order `order`.
+        Args:
+            x: A pytorch tensor. The initial value at time `s`.
+            s: A pytorch tensor. The starting time, with the shape (x.shape[0],).
+            t: A pytorch tensor. The ending time, with the shape (x.shape[0],).
+            order: A `int`. The order of DPM-Solver. We only support order == 1 or 2 or 3.
+        Returns:
+            x_t: A pytorch tensor. The approximated solution at time `t`.
+        """
+        if order == 1:
+            return self.dpm_solver_first_update(x, t_prev_list[-1], t, noise_s=noise_prev_list[-1])
+        elif order == 2:
+            return self.dpm_multistep_second_update(x, noise_prev_list, t_prev_list, t, solver_type=solver_type)
+        elif order == 3:
+            return self.dpm_multistep_third_update(x, noise_prev_list, t_prev_list, t, solver_type=solver_type)
+        else:
+            raise ValueError("Solver order must be 1 or 2 or 3, got {}".format(order))
+    def dpm_solver_adaptive(self, x, order, t_T, t_0, h_init=0.05, atol=0.0078, rtol=0.05, theta=0.9, t_err=1e-5, solver_type='dpm_solver'):
+        """
+        The adaptive step size solver based on DPM-Solver.
+        Args:
+            x: A pytorch tensor. The initial value at time `t_T`.
+            order: A `int`. The (higher) order of the solver. We only support order == 2 or 3.
+            t_T: A `float`. The starting time of the sampling (default is T).
+            t_0: A `float`. The ending time of the sampling (default is epsilon).
+            h_init: A `float`. The initial step size (for logSNR).
+            atol: A `float`. The absolute tolerance of the solver. For image data, the default setting is 0.0078, followed [1].
+            rtol: A `float`. The relative tolerance of the solver. The default setting is 0.05.
+            theta: A `float`. The safety hyperparameter for adapting the step size. The default setting is 0.9, followed [1].
+            t_err: A `float`. The tolerance for the time. We solve the diffusion ODE until the absolute error between the
+                current time and `t_0` is less than `t_err`. The default setting is 1e-5.
+        Returns:
+            x_0: A pytorch tensor. The approximated solution at time `t_0`.
+        [1] A. Jolicoeur-Martineau, K. Li, R. Piché-Taillefer, T. Kachman, and I. Mitliagkas, "Gotta go fast when generating data with score-based models," arXiv preprint arXiv:2105.14080, 2021.
+        """
+        ns = self.noise_schedule
+        s = t_T * torch.ones((x.shape[0],)).to(x)
+        lambda_s = ns.marginal_lambda(s)
+        lambda_0 = ns.marginal_lambda(t_0 * torch.ones_like(s).to(x))
+        h = h_init * torch.ones_like(s).to(x)
+        x_prev = x
+        nfe = 0
+        if order == 2:
+            r1 = 0.5
+            lower_update = lambda x, s, t: self.dpm_solver_first_update(x, s, t, return_noise=True)
+            higher_update = lambda x, s, t, **kwargs: self.dpm_solver_second_update(x, s, t, r1=r1, solver_type=solver_type, **kwargs)
+        elif order == 3:
+            r1, r2 = 1. / 3., 2. / 3.
+            lower_update = lambda x, s, t: self.dpm_solver_second_update(x, s, t, r1=r1, return_noise=True, solver_type=solver_type)
+            higher_update = lambda x, s, t, **kwargs: self.dpm_solver_third_update(x, s, t, r1=r1, r2=r2, solver_type=solver_type, **kwargs)
+        else:
+            raise ValueError("For adaptive step size solver, order must be 2 or 3, got {}".format(order))
+        while torch.abs((s - t_0)).mean() > t_err:
+            t = ns.inverse_lambda(lambda_s + h)
+            x_lower, lower_noise_kwargs = lower_update(x, s, t)
+            x_higher = higher_update(x, s, t, **lower_noise_kwargs)
+            delta = torch.max(torch.ones_like(x).to(x) * atol, rtol * torch.max(torch.abs(x_lower), torch.abs(x_prev)))
+            norm_fn = lambda v: torch.sqrt(torch.square(v.reshape((v.shape[0], -1))).mean(dim=-1, keepdim=True))
+            E = norm_fn((x_higher - x_lower) / delta).max()
+            if torch.all(E <= 1.):
+                x = x_higher
+                s = t
+                x_prev = x_lower
+                lambda_s = ns.marginal_lambda(s)
+            h = torch.min(theta * h * torch.float_power(E, -1. / order).float(), lambda_0 - lambda_s)
+            nfe += order
+        print('adaptive solver nfe', nfe)
+        return x
+    def sample(self, x, steps=10, eps=1e-4, T=None, order=3, skip_type='time_uniform',
+        denoise=False, method='fast', solver_type='dpm_solver', atol=0.0078,
+        rtol=0.05,
+    ):
+        """
+        Compute the sample at time `eps` by DPM-Solver, given the initial `x` at time `T`.
+        We support the following algorithms:
+            - Adaptive step size DPM-Solver (i.e. DPM-Solver-12 and DPM-Solver-23)
+            - Fixed order DPM-Solver (i.e. DPM-Solver-1, DPM-Solver-2 and DPM-Solver-3).
+            - Fast version of DPM-Solver (i.e. DPM-Solver-fast), which uses uniform logSNR steps and combine
+                different orders of DPM-Solver.
+        **We recommend DPM-Solver-fast for both fast sampling in few steps (<=20) and fast convergence in many steps (50 to 100).**
+        Choosing the algorithms:
+            - If `adaptive_step_size` is True:
+                We ignore `steps` and use adaptive step size DPM-Solver with a higher order of `order`.
+                If `order`=2, we use DPM-Solver-12 which combines DPM-Solver-1 and DPM-Solver-2.
+                If `order`=3, we use DPM-Solver-23 which combines DPM-Solver-2 and DPM-Solver-3.
+                You can adjust the absolute tolerance `atol` and the relative tolerance `rtol` to balance the computatation costs
+                (NFE) and the sample quality.
+            - If `adaptive_step_size` is False and `fast_version` is True:
+                We ignore `order` and use DPM-Solver-fast with number of function evaluations (NFE) = `steps`.
+                We ignore `skip_type` and use uniform logSNR steps for DPM-Solver-fast.
+                Given a fixed NFE=`steps`, the sampling procedure by DPM-Solver-fast is:
+                    - Denote K = (steps // 3 + 1). We take K intermediate time steps for sampling.
+                    - If steps % 3 == 0, we use (K - 2) steps of DPM-Solver-3, and 1 step of DPM-Solver-2 and 1 step of DPM-Solver-1.
+                    - If steps % 3 == 1, we use (K - 1) steps of DPM-Solver-3 and 1 step of DPM-Solver-1.
+                    - If steps % 3 == 2, we use (K - 1) steps of DPM-Solver-3 and 1 step of DPM-Solver-2.
+            - If `adaptive_step_size` is False and `fast_version` is False:
+                We use DPM-Solver-`order` for `order`=1 or 2 or 3, with total [`steps` // `order`] * `order` NFE.
+                We support three types of `skip_type`:
+                    - 'logSNR': uniform logSNR for the time steps, **recommended for DPM-Solver**.
+                    - 'time_uniform': uniform time for the time steps. (Used in DDIM and DDPM.)
+                    - 'time_quadratic': quadratic time for the time steps. (Used in DDIM.)
+        =====================================================
+        Args:
+            x: A pytorch tensor. The initial value at time `T` (a sample from the normal distribution).
+            steps: A `int`. The total number of function evaluations (NFE).
+            eps: A `float`. The ending time of the sampling.
+                We recommend `eps`=1e-3 when `steps` <= 15; and `eps`=1e-4 when `steps` > 15.
+            T: A `float`. The starting time of the sampling. Default is `None`.
+                If `T` is None, we use self.noise_schedule.T.
+            order: A `int`. The order of DPM-Solver.
+            skip_type: A `str`. The type for the spacing of the time steps. Default is 'logSNR'.
+            adaptive_step_size: A `bool`. If true, use the adaptive step size DPM-Solver.
+            fast_version: A `bool`. If true, use DPM-Solver-fast (recommended).
+            atol: A `float`. The absolute tolerance of the adaptive step size solver.
+            rtol: A `float`. The relative tolerance of the adaptive step size solver.
+        Returns:
+            x_0: A pytorch tensor. The approximated solution at time `t_0`.
+        [1] A. Jolicoeur-Martineau, K. Li, R. Piché-Taillefer, T. Kachman, and I. Mitliagkas, "Gotta go fast when generating data with score-based models," arXiv preprint arXiv:2105.14080, 2021.
+        """
+        t_0 = eps
+        t_T = self.noise_schedule.T if T is None else T
+        device = x.device
+        if method == 'adaptive':
+            with torch.no_grad():
+                x = self.dpm_solver_adaptive(x, order=order, t_T=t_T, t_0=t_0, atol=atol, rtol=rtol, solver_type=solver_type)
+        elif method == 'multistep':
+            assert steps >= order
+            if timesteps is None:
+                timesteps = self.get_time_steps(skip_type=skip_type, t_T=t_T, t_0=t_0, N=steps, device=device)
+            assert timesteps.shape[0] - 1 == steps
+            with torch.no_grad():
+                vec_t = timesteps[0].expand((x.shape[0]))
+                noise_prev_list = [self.model_fn(x, vec_t)]
+                t_prev_list = [vec_t]
+                for init_order in range(1, order):
+                    vec_t = timesteps[init_order].expand(x.shape[0])
+                    x = self.dpm_multistep_update(x, noise_prev_list, t_prev_list, vec_t, init_order, solver_type=solver_type)
+                    noise_prev_list.append(self.model_fn(x, vec_t))
+                    t_prev_list.append(vec_t)
+                for step in range(order, steps + 1):
+                    vec_t = timesteps[step].expand(x.shape[0])
+                    x = self.dpm_multistep_update(x, noise_prev_list, t_prev_list, vec_t, order, solver_type=solver_type)
+                    for i in range(order - 1):
+                        t_prev_list[i] = t_prev_list[i + 1]
+                        noise_prev_list[i] = noise_prev_list[i + 1]
+                    t_prev_list[-1] = vec_t
+                    if step < steps:
+                        noise_prev_list[-1] = self.model_fn(x, vec_t)
+        elif method == 'fast':
+            orders, _ = self.get_time_steps_for_dpm_solver_fast(skip_type=skip_type, t_T=t_T, t_0=t_0, steps=steps, order=order, device=device)
+            timesteps = self.get_time_steps(skip_type=skip_type, t_T=t_T, t_0=t_0, N=steps, device=device)
+            with torch.no_grad():
+                i = 0
+                for order in orders:
+                    vec_s, vec_t = torch.ones((x.shape[0],)).to(device) * timesteps[i], torch.ones((x.shape[0],)).to(device) * timesteps[i + order]
+                    h = self.noise_schedule.marginal_lambda(timesteps[i + order]) - self.noise_schedule.marginal_lambda(timesteps[i])
+                    r1 = None if order <= 1 else (self.noise_schedule.marginal_lambda(timesteps[i + 1]) - self.noise_schedule.marginal_lambda(timesteps[i])) / h
+                    r2 = None if order <= 2 else (self.noise_schedule.marginal_lambda(timesteps[i + 2]) - self.noise_schedule.marginal_lambda(timesteps[i])) / h
+                    x = self.dpm_solver_update(x, vec_s, vec_t, order, solver_type=solver_type, r1=r1, r2=r2)
+                    i += order
+        elif method == 'singlestep':
+            N_steps = steps // order
+            orders = [order,] * N_steps
+            timesteps = self.get_time_steps(skip_type=skip_type, t_T=t_T, t_0=t_0, N=N_steps, device=device)
+            assert len(timesteps) - 1 == N_steps
+            with torch.no_grad():
+                for i, order in enumerate(orders):
+                    vec_s, vec_t = torch.ones((x.shape[0],)).to(device) * timesteps[i], torch.ones((x.shape[0],)).to(device) * timesteps[i + 1]
+                    x = self.dpm_solver_update(x, vec_s, vec_t, order, solver_type=solver_type)
+        if denoise:
+            x = self.denoise_fn(x, torch.ones((x.shape[0],)).to(device) * t_0)
+        return x

src/COP-GEN-Beta/encode_majortom_images.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import torch.nn as nn
+import numpy as np
+import torch
+from datasets import MajorTOMThumbnail
+from torch.utils.data import DataLoader
+from libs.autoencoder import get_model
+import argparse
+from tqdm import tqdm
+import os
+torch.manual_seed(0)
+np.random.seed(0)
+def get_existing_encoded_files(output_dir, image_paths):
+    """Returns a set of filenames that already have their encoded features saved"""
+    existing_files = set()
+    missing_files = set()
+    for img_path in image_paths:
+        filename = os.path.basename(img_path).split('.')[0]
+        npy_path = os.path.join(output_dir, f'{filename}.npy')
+        if os.path.exists(npy_path):
+            existing_files.add(filename)
+        else:
+            missing_files.add(filename)
+    print(f"\nFound {len(existing_files)} already encoded files")
+    print(f"Missing {len(missing_files)} files to encode")
+    return existing_files, missing_files
+def main(resolution=256):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--path', type=str)
+    parser.add_argument('--resolution', type=int, default=256)
+    parser.add_argument('--output_dir', type=str)
+    args = parser.parse_args()
+    # Create output directory if it doesn't exist
+    os.makedirs(args.output_dir, exist_ok=True)
+    datafactory = MajorTOMThumbnail(path=args.path, resolution=args.resolution)
+    dataset = datafactory.get_split(split=None, labeled=False, nosplit=True)
+    image_paths = dataset.image_paths
+    # Check for existing encoded files
+    # existing_files, missing_files = get_existing_encoded_files(args.output_dir, image_paths)
+    # TODO: Restart is not working yet
+    existing_files = set()
+    missing_files = set(image_paths)
+    if len(missing_files) == 0:
+        print("All files have already been encoded. Exiting...")
+        return
+    # Filter dataset to only process missing files
+    filtered_indices = [i for i, path in enumerate(image_paths)
+                       if os.path.basename(path).split('.')[0] not in existing_files]
+    dataset.image_paths = [image_paths[i] for i in filtered_indices]
+    dataset_loader = DataLoader(dataset, batch_size=128, shuffle=False, drop_last=False,
+                              num_workers=8, pin_memory=True, persistent_workers=True)
+    model = get_model('assets/stable-diffusion/autoencoder_kl.pth')
+    # model = nn.DataParallel(model)
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    model.to(device)
+    processed_count = 0
+    for img, filename in tqdm(dataset_loader, desc="Encoding images", unit="batch"):
+        img = img.to(device)
+        moments = model(img, fn='encode_moments')
+        moments = moments.detach().cpu().numpy()
+        for moment, fname in zip(moments, filename):
+            np.save(f'{args.output_dir}/{fname}.npy', moment)
+            processed_count += 1
+    print(f'\nProcessed {processed_count} new files')
+    print(f'Total encoded files: {len(existing_files) + processed_count}')
+    # features = []
+    # labels = []
+    # features = np.concatenate(features, axis=0)
+    # labels = np.concatenate(labels, axis=0)
+    # print(f'features.shape={features.shape}')
+    # print(f'labels.shape={labels.shape}')
+    # np.save(f'imagenet{resolution}_features.npy', features)
+    # np.save(f'imagenet{resolution}_labels.npy', labels)
+if __name__ == "__main__":
+    main()

src/COP-GEN-Beta/libs/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # codes from third party

src/COP-GEN-Beta/libs/autoencoder.py ADDED Viewed

	@@ -0,0 +1,519 @@

+import torch
+import torch.nn as nn
+import numpy as np
+from einops import rearrange
+class LinearAttention(nn.Module):
+    def __init__(self, dim, heads=4, dim_head=32):
+        super().__init__()
+        self.heads = heads
+        hidden_dim = dim_head * heads
+        self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias = False)
+        self.to_out = nn.Conv2d(hidden_dim, dim, 1)
+    def forward(self, x):
+        b, c, h, w = x.shape
+        qkv = self.to_qkv(x)
+        q, k, v = rearrange(qkv, 'b (qkv heads c) h w -> qkv b heads c (h w)', heads = self.heads, qkv=3)
+        k = k.softmax(dim=-1)
+        context = torch.einsum('bhdn,bhen->bhde', k, v)
+        out = torch.einsum('bhde,bhdn->bhen', context, q)
+        out = rearrange(out, 'b heads c (h w) -> b (heads c) h w', heads=self.heads, h=h, w=w)
+        return self.to_out(out)
+def nonlinearity(x):
+    # swish
+    return x*torch.sigmoid(x)
+def Normalize(in_channels, num_groups=32):
+    return torch.nn.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True)
+class Upsample(nn.Module):
+    def __init__(self, in_channels, with_conv):
+        super().__init__()
+        self.with_conv = with_conv
+        if self.with_conv:
+            self.conv = torch.nn.Conv2d(in_channels,
+                                        in_channels,
+                                        kernel_size=3,
+                                        stride=1,
+                                        padding=1)
+    def forward(self, x):
+        x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest")
+        if self.with_conv:
+            x = self.conv(x)
+        return x
+class Downsample(nn.Module):
+    def __init__(self, in_channels, with_conv):
+        super().__init__()
+        self.with_conv = with_conv
+        if self.with_conv:
+            # no asymmetric padding in torch conv, must do it ourselves
+            self.conv = torch.nn.Conv2d(in_channels,
+                                        in_channels,
+                                        kernel_size=3,
+                                        stride=2,
+                                        padding=0)
+    def forward(self, x):
+        if self.with_conv:
+            pad = (0,1,0,1)
+            x = torch.nn.functional.pad(x, pad, mode="constant", value=0)
+            x = self.conv(x)
+        else:
+            x = torch.nn.functional.avg_pool2d(x, kernel_size=2, stride=2)
+        return x
+class ResnetBlock(nn.Module):
+    def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False,
+                 dropout, temb_channels=512):
+        super().__init__()
+        self.in_channels = in_channels
+        out_channels = in_channels if out_channels is None else out_channels
+        self.out_channels = out_channels
+        self.use_conv_shortcut = conv_shortcut
+        self.norm1 = Normalize(in_channels)
+        self.conv1 = torch.nn.Conv2d(in_channels,
+                                     out_channels,
+                                     kernel_size=3,
+                                     stride=1,
+                                     padding=1)
+        if temb_channels > 0:
+            self.temb_proj = torch.nn.Linear(temb_channels,
+                                             out_channels)
+        self.norm2 = Normalize(out_channels)
+        self.dropout = torch.nn.Dropout(dropout)
+        self.conv2 = torch.nn.Conv2d(out_channels,
+                                     out_channels,
+                                     kernel_size=3,
+                                     stride=1,
+                                     padding=1)
+        if self.in_channels != self.out_channels:
+            if self.use_conv_shortcut:
+                self.conv_shortcut = torch.nn.Conv2d(in_channels,
+                                                     out_channels,
+                                                     kernel_size=3,
+                                                     stride=1,
+                                                     padding=1)
+            else:
+                self.nin_shortcut = torch.nn.Conv2d(in_channels,
+                                                    out_channels,
+                                                    kernel_size=1,
+                                                    stride=1,
+                                                    padding=0)
+    def forward(self, x, temb):
+        h = x
+        h = self.norm1(h)
+        h = nonlinearity(h)
+        h = self.conv1(h)
+        if temb is not None:
+            h = h + self.temb_proj(nonlinearity(temb))[:,:,None,None]
+        h = self.norm2(h)
+        h = nonlinearity(h)
+        h = self.dropout(h)
+        h = self.conv2(h)
+        if self.in_channels != self.out_channels:
+            if self.use_conv_shortcut:
+                x = self.conv_shortcut(x)
+            else:
+                x = self.nin_shortcut(x)
+        return x+h
+class LinAttnBlock(LinearAttention):
+    """to match AttnBlock usage"""
+    def __init__(self, in_channels):
+        super().__init__(dim=in_channels, heads=1, dim_head=in_channels)
+class AttnBlock(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.in_channels = in_channels
+        self.norm = Normalize(in_channels)
+        self.q = torch.nn.Conv2d(in_channels,
+                                 in_channels,
+                                 kernel_size=1,
+                                 stride=1,
+                                 padding=0)
+        self.k = torch.nn.Conv2d(in_channels,
+                                 in_channels,
+                                 kernel_size=1,
+                                 stride=1,
+                                 padding=0)
+        self.v = torch.nn.Conv2d(in_channels,
+                                 in_channels,
+                                 kernel_size=1,
+                                 stride=1,
+                                 padding=0)
+        self.proj_out = torch.nn.Conv2d(in_channels,
+                                        in_channels,
+                                        kernel_size=1,
+                                        stride=1,
+                                        padding=0)
+    def forward(self, x):
+        h_ = x
+        h_ = self.norm(h_)
+        q = self.q(h_)
+        k = self.k(h_)
+        v = self.v(h_)
+        # compute attention
+        b,c,h,w = q.shape
+        q = q.reshape(b,c,h*w)
+        q = q.permute(0,2,1)   # b,hw,c
+        k = k.reshape(b,c,h*w) # b,c,hw
+        w_ = torch.bmm(q,k)     # b,hw,hw    w[b,i,j]=sum_c q[b,i,c]k[b,c,j]
+        w_ = w_ * (int(c)**(-0.5))
+        w_ = torch.nn.functional.softmax(w_, dim=2)
+        # attend to values
+        v = v.reshape(b,c,h*w)
+        w_ = w_.permute(0,2,1)   # b,hw,hw (first hw of k, second of q)
+        h_ = torch.bmm(v,w_)     # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]
+        h_ = h_.reshape(b,c,h,w)
+        h_ = self.proj_out(h_)
+        return x+h_
+def make_attn(in_channels, attn_type="vanilla"):
+    assert attn_type in ["vanilla", "linear", "none"], f'attn_type {attn_type} unknown'
+    print(f"making attention of type '{attn_type}' with {in_channels} in_channels")
+    if attn_type == "vanilla":
+        return AttnBlock(in_channels)
+    elif attn_type == "none":
+        return nn.Identity(in_channels)
+    else:
+        return LinAttnBlock(in_channels)
+class Encoder(nn.Module):
+    def __init__(self, *, ch, out_ch, ch_mult=(1,2,4,8), num_res_blocks,
+                 attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels,
+                 resolution, z_channels, double_z=True, use_linear_attn=False, attn_type="vanilla",
+                 **ignore_kwargs):
+        super().__init__()
+        if use_linear_attn: attn_type = "linear"
+        self.ch = ch
+        self.temb_ch = 0
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.in_channels = in_channels
+        # downsampling
+        self.conv_in = torch.nn.Conv2d(in_channels,
+                                       self.ch,
+                                       kernel_size=3,
+                                       stride=1,
+                                       padding=1)
+        curr_res = resolution
+        in_ch_mult = (1,)+tuple(ch_mult)
+        self.in_ch_mult = in_ch_mult
+        self.down = nn.ModuleList()
+        for i_level in range(self.num_resolutions):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_in = ch*in_ch_mult[i_level]
+            block_out = ch*ch_mult[i_level]
+            for i_block in range(self.num_res_blocks):
+                block.append(ResnetBlock(in_channels=block_in,
+                                         out_channels=block_out,
+                                         temb_channels=self.temb_ch,
+                                         dropout=dropout))
+                block_in = block_out
+                if curr_res in attn_resolutions:
+                    attn.append(make_attn(block_in, attn_type=attn_type))
+            down = nn.Module()
+            down.block = block
+            down.attn = attn
+            if i_level != self.num_resolutions-1:
+                down.downsample = Downsample(block_in, resamp_with_conv)
+                curr_res = curr_res // 2
+            self.down.append(down)
+        # middle
+        self.mid = nn.Module()
+        self.mid.block_1 = ResnetBlock(in_channels=block_in,
+                                       out_channels=block_in,
+                                       temb_channels=self.temb_ch,
+                                       dropout=dropout)
+        self.mid.attn_1 = make_attn(block_in, attn_type=attn_type)
+        self.mid.block_2 = ResnetBlock(in_channels=block_in,
+                                       out_channels=block_in,
+                                       temb_channels=self.temb_ch,
+                                       dropout=dropout)
+        # end
+        self.norm_out = Normalize(block_in)
+        self.conv_out = torch.nn.Conv2d(block_in,
+                                        2*z_channels if double_z else z_channels,
+                                        kernel_size=3,
+                                        stride=1,
+                                        padding=1)
+    def forward(self, x):
+        # timestep embedding
+        temb = None
+        # downsampling
+        hs = [self.conv_in(x)]
+        for i_level in range(self.num_resolutions):
+            for i_block in range(self.num_res_blocks):
+                h = self.down[i_level].block[i_block](hs[-1], temb)
+                if len(self.down[i_level].attn) > 0:
+                    h = self.down[i_level].attn[i_block](h)
+                hs.append(h)
+            if i_level != self.num_resolutions-1:
+                hs.append(self.down[i_level].downsample(hs[-1]))
+        # middle
+        h = hs[-1]
+        h = self.mid.block_1(h, temb)
+        h = self.mid.attn_1(h)
+        h = self.mid.block_2(h, temb)
+        # end
+        h = self.norm_out(h)
+        h = nonlinearity(h)
+        h = self.conv_out(h)
+        return h
+class Decoder(nn.Module):
+    def __init__(self, *, ch, out_ch, ch_mult=(1,2,4,8), num_res_blocks,
+                 attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels,
+                 resolution, z_channels, give_pre_end=False, tanh_out=False, use_linear_attn=False,
+                 attn_type="vanilla", **ignorekwargs):
+        super().__init__()
+        if use_linear_attn: attn_type = "linear"
+        self.ch = ch
+        self.temb_ch = 0
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.in_channels = in_channels
+        self.give_pre_end = give_pre_end
+        self.tanh_out = tanh_out
+        # compute in_ch_mult, block_in and curr_res at lowest res
+        in_ch_mult = (1,)+tuple(ch_mult)
+        block_in = ch*ch_mult[self.num_resolutions-1]
+        curr_res = resolution // 2**(self.num_resolutions-1)
+        self.z_shape = (1,z_channels,curr_res,curr_res)
+        print("Working with z of shape {} = {} dimensions.".format(
+            self.z_shape, np.prod(self.z_shape)))
+        # z to block_in
+        self.conv_in = torch.nn.Conv2d(z_channels,
+                                       block_in,
+                                       kernel_size=3,
+                                       stride=1,
+                                       padding=1)
+        # middle
+        self.mid = nn.Module()
+        self.mid.block_1 = ResnetBlock(in_channels=block_in,
+                                       out_channels=block_in,
+                                       temb_channels=self.temb_ch,
+                                       dropout=dropout)
+        self.mid.attn_1 = make_attn(block_in, attn_type=attn_type)
+        self.mid.block_2 = ResnetBlock(in_channels=block_in,
+                                       out_channels=block_in,
+                                       temb_channels=self.temb_ch,
+                                       dropout=dropout)
+        # upsampling
+        self.up = nn.ModuleList()
+        for i_level in reversed(range(self.num_resolutions)):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_out = ch*ch_mult[i_level]
+            for i_block in range(self.num_res_blocks+1):
+                block.append(ResnetBlock(in_channels=block_in,
+                                         out_channels=block_out,
+                                         temb_channels=self.temb_ch,
+                                         dropout=dropout))
+                block_in = block_out
+                if curr_res in attn_resolutions:
+                    attn.append(make_attn(block_in, attn_type=attn_type))
+            up = nn.Module()
+            up.block = block
+            up.attn = attn
+            if i_level != 0:
+                up.upsample = Upsample(block_in, resamp_with_conv)
+                curr_res = curr_res * 2
+            self.up.insert(0, up) # prepend to get consistent order
+        # end
+        self.norm_out = Normalize(block_in)
+        self.conv_out = torch.nn.Conv2d(block_in,
+                                        out_ch,
+                                        kernel_size=3,
+                                        stride=1,
+                                        padding=1)
+    def forward(self, z):
+        #assert z.shape[1:] == self.z_shape[1:]
+        self.last_z_shape = z.shape
+        # timestep embedding
+        temb = None
+        # z to block_in
+        h = self.conv_in(z)
+        # middle
+        h = self.mid.block_1(h, temb)
+        h = self.mid.attn_1(h)
+        h = self.mid.block_2(h, temb)
+        # upsampling
+        for i_level in reversed(range(self.num_resolutions)):
+            for i_block in range(self.num_res_blocks+1):
+                h = self.up[i_level].block[i_block](h, temb)
+                if len(self.up[i_level].attn) > 0:
+                    h = self.up[i_level].attn[i_block](h)
+            if i_level != 0:
+                h = self.up[i_level].upsample(h)
+        # end
+        if self.give_pre_end:
+            return h
+        h = self.norm_out(h)
+        h = nonlinearity(h)
+        h = self.conv_out(h)
+        if self.tanh_out:
+            h = torch.tanh(h)
+        return h
+class FrozenAutoencoderKL(nn.Module):
+    def __init__(self, ddconfig, embed_dim, pretrained_path, scale_factor=0.18215):
+        super().__init__()
+        print(f'Create autoencoder with scale_factor={scale_factor}')
+        self.encoder = Encoder(**ddconfig)
+        self.decoder = Decoder(**ddconfig)
+        assert ddconfig["double_z"]
+        self.quant_conv = torch.nn.Conv2d(2 * ddconfig["z_channels"], 2 * embed_dim, 1)
+        self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
+        self.embed_dim = embed_dim
+        self.scale_factor = scale_factor
+        m, u = self.load_state_dict(torch.load(pretrained_path, map_location='cpu'))
+        assert len(m) == 0 and len(u) == 0
+        self.eval()
+        self.requires_grad_(False)
+    def encode_moments(self, x):
+        h = self.encoder(x)
+        moments = self.quant_conv(h)
+        return moments
+    def sample(self, moments):
+        mean, logvar = torch.chunk(moments, 2, dim=1)
+        logvar = torch.clamp(logvar, -30.0, 20.0)
+        std = torch.exp(0.5 * logvar)
+        z = mean + std * torch.randn_like(mean)
+        z = self.scale_factor * z
+        return z
+    def encode(self, x):
+        moments = self.encode_moments(x)
+        z = self.sample(moments)
+        return z
+    def decode(self, z):
+        z = (1. / self.scale_factor) * z
+        z = self.post_quant_conv(z)
+        dec = self.decoder(z)
+        return dec
+    def forward(self, inputs, fn):
+        if fn == 'encode_moments':
+            return self.encode_moments(inputs)
+        elif fn == 'encode':
+            return self.encode(inputs)
+        elif fn == 'decode':
+            return self.decode(inputs)
+        else:
+            raise NotImplementedError
+def get_model(pretrained_path, scale_factor=0.18215):
+    ddconfig = dict(
+        double_z=True,
+        z_channels=4,
+        resolution=256,
+        in_channels=3,
+        out_ch=3,
+        ch=128,
+        ch_mult=[1, 2, 4, 4],
+        num_res_blocks=2,
+        attn_resolutions=[],
+        dropout=0.0
+    )
+    return FrozenAutoencoderKL(ddconfig, 4, pretrained_path, scale_factor)
+def main():
+    import torchvision.transforms as transforms
+    from torchvision.utils import save_image
+    import os
+    from PIL import Image
+    model = get_model('assets/stable-diffusion/autoencoder_kl.pth')
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    model = model.to(device)
+    scale_factor = 0.18215
+    T = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(256), transforms.ToTensor()])
+    path = 'imgs'
+    fnames = os.listdir(path)
+    for fname in fnames:
+        p = os.path.join(path, fname)
+        img = Image.open(p)
+        img = T(img)
+        img = img * 2. - 1
+        img = img[None, ...]
+        img = img.to(device)
+        # with torch.cuda.amp.autocast():
+        #     moments = model.encode_moments(img)
+        #     mean, logvar = torch.chunk(moments, 2, dim=1)
+        #     logvar = torch.clamp(logvar, -30.0, 20.0)
+        #     std = torch.exp(0.5 * logvar)
+        #     zs = [(mean + std * torch.randn_like(mean)) * scale_factor for _ in range(4)]
+        #     recons = [model.decode(z) for z in zs]
+        with torch.cuda.amp.autocast():
+            print('test encode & decode')
+            recons = [model.decode(model.encode(img)) for _ in range(4)]
+        out = torch.cat([img, *recons], dim=0)
+        out = (out + 1) * 0.5
+        save_image(out, f'recons_{fname}')
+if __name__ == "__main__":
+    main()

src/COP-GEN-Beta/libs/timm.py ADDED Viewed

	@@ -0,0 +1,112 @@

+# code from timm 0.3.2
+import torch
+import torch.nn as nn
+import math
+import warnings
+def _no_grad_trunc_normal_(tensor, mean, std, a, b):
+    # Cut & paste from PyTorch official master until it's in a few official releases - RW
+    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    def norm_cdf(x):
+        # Computes standard normal cumulative distribution function
+        return (1. + math.erf(x / math.sqrt(2.))) / 2.
+    if (mean < a - 2 * std) or (mean > b + 2 * std):
+        warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+                      "The distribution of values may be incorrect.",
+                      stacklevel=2)
+    with torch.no_grad():
+        # Values are generated by using a truncated uniform distribution and
+        # then using the inverse CDF for the normal distribution.
+        # Get upper and lower cdf values
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+        # Uniformly fill tensor with values from [l, u], then translate to
+        # [2l-1, 2u-1].
+        tensor.uniform_(2 * l - 1, 2 * u - 1)
+        # Use inverse cdf transform for normal distribution to get truncated
+        # standard normal
+        tensor.erfinv_()
+        # Transform to proper mean, std
+        tensor.mul_(std * math.sqrt(2.))
+        tensor.add_(mean)
+        # Clamp to ensure it's in the proper range
+        tensor.clamp_(min=a, max=b)
+        return tensor
+def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
+    # type: (Tensor, float, float, float, float) -> Tensor
+    r"""Fills the input Tensor with values drawn from a truncated
+    normal distribution. The values are effectively drawn from the
+    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
+    with values outside :math:`[a, b]` redrawn until they are within
+    the bounds. The method used for generating the random values works
+    best when :math:`a \leq \text{mean} \leq b`.
+    Args:
+        tensor: an n-dimensional `torch.Tensor`
+        mean: the mean of the normal distribution
+        std: the standard deviation of the normal distribution
+        a: the minimum cutoff value
+        b: the maximum cutoff value
+    Examples:
+        >>> w = torch.empty(3, 5)
+        >>> nn.init.trunc_normal_(w)
+    """
+    return _no_grad_trunc_normal_(tensor, mean, std, a, b)
+def drop_path(x, drop_prob: float = 0., training: bool = False):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
+    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
+    'survival rate' as the argument.
+    """
+    if drop_prob == 0. or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
+    random_tensor.floor_()  # binarize
+    output = x.div(keep_prob) * random_tensor
+    return output
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    """
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x

src/COP-GEN-Beta/libs/triffuser_multi_post_ln.py ADDED Viewed

	@@ -0,0 +1,290 @@

+import torch
+import torch.nn as nn
+import math
+from .timm import trunc_normal_, DropPath, Mlp
+import einops
+import torch.utils.checkpoint
+import torch.nn.functional as F
+if hasattr(torch.nn.functional, 'scaled_dot_product_attention'):
+    ATTENTION_MODE = 'flash'
+else:
+    try:
+        import xformers
+        import xformers.ops
+        ATTENTION_MODE = 'xformers'
+    except:
+        ATTENTION_MODE = 'math'
+print(f'attention mode is {ATTENTION_MODE}')
+def timestep_embedding(timesteps, dim, max_period=10000):
+    """
+    Create sinusoidal timestep embeddings.
+    :param timesteps: a 1-D Tensor of N indices, one per batch element.
+                      These may be fractional.
+    :param dim: the dimension of the output.
+    :param max_period: controls the minimum frequency of the embeddings.
+    :return: an [N x dim] Tensor of positional embeddings.
+    """
+    half = dim // 2
+    freqs = torch.exp(
+        -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half
+    ).to(device=timesteps.device)
+    args = timesteps[:, None].float() * freqs[None]
+    embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
+    if dim % 2:
+        embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
+    return embedding
+def patchify(imgs, patch_size):
+    x = einops.rearrange(imgs, 'B C (h p1) (w p2) -> B (h w) (p1 p2 C)', p1=patch_size, p2=patch_size)
+    return x
+def unpatchify(x, in_chans):
+    patch_size = int((x.shape[2] // in_chans) ** 0.5)
+    h = w = int(x.shape[1] ** .5)
+    assert h * w == x.shape[1] and patch_size ** 2 * in_chans == x.shape[2]
+    x = einops.rearrange(x, 'B (h w) (p1 p2 C) -> B C (h p1) (w p2)', h=h, p1=patch_size, p2=patch_size)
+    return x
+def interpolate_pos_emb(pos_emb, old_shape, new_shape):
+    pos_emb = einops.rearrange(pos_emb, 'B (H W) C -> B C H W', H=old_shape[0], W=old_shape[1])
+    pos_emb = F.interpolate(pos_emb, new_shape, mode='bilinear')
+    pos_emb = einops.rearrange(pos_emb, 'B C H W -> B (H W) C')
+    return pos_emb
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+    def forward(self, x):
+        B, L, C = x.shape
+        qkv = self.qkv(x)
+        if ATTENTION_MODE == 'flash':
+            qkv = einops.rearrange(qkv, 'B L (K H D) -> K B H L D', K=3, H=self.num_heads).float()
+            q, k, v = qkv[0], qkv[1], qkv[2]  # B H L D
+            x = torch.nn.functional.scaled_dot_product_attention(q, k, v)
+            x = einops.rearrange(x, 'B H L D -> B L (H D)')
+        elif ATTENTION_MODE == 'xformers':
+            qkv = einops.rearrange(qkv, 'B L (K H D) -> K B L H D', K=3, H=self.num_heads)
+            q, k, v = qkv[0], qkv[1], qkv[2]  # B L H D
+            x = xformers.ops.memory_efficient_attention(q, k, v)
+            x = einops.rearrange(x, 'B L H D -> B L (H D)', H=self.num_heads)
+        elif ATTENTION_MODE == 'math':
+            with torch.amp.autocast(device_type='cuda', enabled=False):
+                qkv = einops.rearrange(qkv, 'B L (K H D) -> K B H L D', K=3, H=self.num_heads).float()
+                q, k, v = qkv[0], qkv[1], qkv[2]  # B H L D
+                attn = (q @ k.transpose(-2, -1)) * self.scale
+                attn = attn.softmax(dim=-1)
+                attn = self.attn_drop(attn)
+                x = (attn @ v).transpose(1, 2).reshape(B, L, C)
+        else:
+            raise NotImplemented
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, skip=False, use_checkpoint=False):
+        super().__init__()
+        self.norm1 = norm_layer(dim) if skip else None
+        self.norm2 = norm_layer(dim)
+        self.attn = Attention(
+            dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm3 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+        self.skip_linear = nn.Linear(2 * dim, dim) if skip else None
+        self.use_checkpoint = use_checkpoint
+    def forward(self, x, skip=None):
+        if self.use_checkpoint:
+            return torch.utils.checkpoint.checkpoint(self._forward, x, skip)
+        else:
+            return self._forward(x, skip)
+    def _forward(self, x, skip=None):
+        if self.skip_linear is not None:
+            x = self.skip_linear(torch.cat([x, skip], dim=-1))
+            x = self.norm1(x)
+        x = x + self.drop_path(self.attn(x))
+        x = self.norm2(x)
+        x = x + self.drop_path(self.mlp(x))
+        x = self.norm3(x)
+        return x
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, patch_size, in_chans=3, embed_dim=768):
+        super().__init__()
+        self.patch_size = patch_size
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+    def forward(self, x):
+        B, C, H, W = x.shape
+        assert H % self.patch_size == 0 and W % self.patch_size == 0
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x
+class Triffuser(nn.Module):
+    def __init__(self, img_size, in_chans, patch_size, embed_dim=768, depth=12,
+                 num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, pos_drop_rate=0., drop_rate=0., attn_drop_rate=0.,
+                 norm_layer=nn.LayerNorm, mlp_time_embed=False, use_checkpoint=False,
+                 num_modalities=None,
+                 # text_dim=None,
+                 # num_text_tokens=None,
+                 clip_img_dim=None # All modalities with the same clip dimension
+                 ):
+        super().__init__()
+        self.in_chans = in_chans
+        self.patch_size = patch_size
+        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
+        self.num_modalities = num_modalities
+        if num_modalities is None:
+            raise ValueError("num_modalities must be provided")
+        self.patch_embeds = nn.ModuleList([PatchEmbed(patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim) for _ in range(num_modalities)])
+        self.img_size = (img_size, img_size) if isinstance(img_size, int) else img_size  # the default img size
+        assert self.img_size[0] % patch_size == 0 and self.img_size[1] % patch_size == 0
+        self.num_patches = (self.img_size[0] // patch_size) * (self.img_size[1] // patch_size)
+        self.time_img_embeds = nn.ModuleList([nn.Sequential(
+            nn.Linear(embed_dim, 4 * embed_dim),
+            nn.SiLU(),
+            nn.Linear(4 * embed_dim, embed_dim),
+        ) if mlp_time_embed else nn.Identity() for _ in range(num_modalities)])
+        # self.text_embed = nn.Linear(text_dim, embed_dim)
+        # self.text_out = nn.Linear(embed_dim, text_dim)
+        # TODO: We skip clip embedding for now
+        # self.clip_img_embed = nn.Linear(clip_img_dim, embed_dim)
+        # self.clip_img_out = nn.Linear(embed_dim, clip_img_dim)
+        # self.num_text_tokens = num_text_tokens
+        # TODO: ATM we assume the same num_patches for all modalities
+        # 1 for time embedding token of each modality
+        # num_patches for each modality (assuming the same number of patches for all modalities)
+        self.num_tokens = 1 * self.num_modalities + self.num_patches * self.num_modalities
+        self.pos_embed = nn.Parameter(torch.zeros(1, self.num_tokens, embed_dim))
+        self.pos_drop = nn.Dropout(p=pos_drop_rate)
+        self.in_blocks = nn.ModuleList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+                drop=drop_rate, attn_drop=attn_drop_rate, norm_layer=norm_layer, use_checkpoint=use_checkpoint)
+            for _ in range(depth // 2)])
+        self.mid_block = Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+                drop=drop_rate, attn_drop=attn_drop_rate, norm_layer=norm_layer, use_checkpoint=use_checkpoint)
+        self.out_blocks = nn.ModuleList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+                drop=drop_rate, attn_drop=attn_drop_rate, norm_layer=norm_layer, skip=True, use_checkpoint=use_checkpoint)
+            for _ in range(depth // 2)])
+        self.norm = norm_layer(embed_dim)
+        self.patch_dim = patch_size ** 2 * in_chans
+        self.decoder_preds = nn.ModuleList([nn.Linear(embed_dim, self.patch_dim, bias=True) for _ in range(num_modalities)])
+        trunc_normal_(self.pos_embed, std=.02)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'pos_embed'}
+    def forward(self, imgs, t_imgs):
+        assert len(imgs) == len(t_imgs) == self.num_modalities
+        # TODO: We are still assuming all images have the same shape
+        _, _, H, W = imgs[0].shape
+        imgs = [self.patch_embeds[i](img) for i, img in enumerate(imgs)]
+        t_imgs_token = [self.time_img_embeds[i](timestep_embedding(t_img, self.embed_dim)) for i, t_img in enumerate(t_imgs)]
+        t_imgs_token = [t_img_token.unsqueeze(dim=1) for t_img_token in t_imgs_token]
+        # text = self.text_embed(text)
+        # clip_img = self.clip_img_embed(clip_img)
+        x = torch.cat((*t_imgs_token, *imgs), dim=1)
+        num_img_tokens = [img.size(1) for img in imgs] # Each image might have different number of tokens
+        num_t_tokens = [1] * self.num_modalities # There is only one time token for each modality
+        # TODO: ATM assume all modality images have the same shape
+        if H == self.img_size[0] and W == self.img_size[1]:
+            pos_embed = self.pos_embed
+        else:  # interpolate the positional embedding when the input image is not of the default shape
+            raise NotImplementedError("Why are we here? Images are not of the default shape. Interpolate positional embedding.")
+            pos_embed_others, pos_embed_patches = torch.split(self.pos_embed, [1 + 1 + num_text_tokens + 1, self.num_patches], dim=1)
+            pos_embed_patches = interpolate_pos_emb(pos_embed_patches, (self.img_size[0] // self.patch_size, self.img_size[1] // self.patch_size),
+                                                    (H // self.patch_size, W // self.patch_size))
+            pos_embed = torch.cat((pos_embed_others, pos_embed_patches), dim=1)
+        x = x + pos_embed
+        x = self.pos_drop(x)
+        skips = []
+        for blk in self.in_blocks:
+            x = blk(x)
+            skips.append(x)
+        x = self.mid_block(x)
+        for blk in self.out_blocks:
+            x = blk(x, skips.pop())
+        x = self.norm(x)
+        all_t_imgs = x.split((*num_t_tokens, *num_img_tokens), dim=1)
+        t_imgs_token_out = all_t_imgs[:self.num_modalities]
+        imgs_out = all_t_imgs[self.num_modalities:]
+        imgs_out = [self.decoder_preds[i](img_out) for i, img_out in enumerate(imgs_out)]
+        imgs_out = [unpatchify(img_out, self.in_chans) for img_out in imgs_out]
+        # clip_img_out = self.clip_img_out(clip_img_out)
+        # text_out = self.text_out(text_out)
+        return imgs_out

src/COP-GEN-Beta/majortom/NMajorTOM.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import os
+import pandas as pd
+import torch
+from torch.utils.data import Dataset
+from pathlib import Path
+import rasterio as rio
+from PIL import Image
+import torchvision.transforms as transforms
+import random
+class NMajorTOM(Dataset):
+    """NMajorTOM Dataset with multiple modalities (https://huggingface.co/Major-TOM)
+    Args:
+        modalities (dict): Dictionary of modality configurations, where each key is a modality name
+                          and value is a dict containing:
+                          - df: Metadata dataframe for that modality
+                          - local_dir: Root directory for that modality
+                          - tif_bands: List of tif bands to read
+                          - png_bands: List of png bands to read
+                          - tif_transforms: List of transforms for tif files
+                          - png_transforms: List of transforms for png files
+        random_flip (bool): Whether to randomly flip all modalities together
+        ratio_train_test (float): Ratio of training samples (e.g., 0.8 for 80% train, 20% test)
+        seed (int): Random seed for reproducible train/test splits
+    """
+    def __init__(self, modalities, random_flip=True, ratio_train_test=0.8, seed=42):
+        super().__init__()
+        self.modalities = {}
+        # Set random seed for reproducibility
+        random.seed(seed)
+        # Process each modality's configuration
+        for modality_name, config in modalities.items():
+            # Drop rows that are complete duplicates across all relevant columns
+            num_rows = len(config['df'])
+            if modality_name == 'S1RTC':
+                relevant_cols = ['grid_cell', 'grid_row_u', 'grid_col_r', 'product_id',
+                                    'timestamp', 'nodata', 'orbit_state', 'centre_lat',
+                                    'centre_lon', 'crs', 'parquet_url', 'geometry']
+            else:
+                relevant_cols = list(config['df'].keys())
+            config['df'] = config['df'].drop_duplicates(subset=relevant_cols)
+            print(f"Dropped {num_rows - len(config['df'])} duplicates from {modality_name}")
+            # By now, we should have no duplicate grid_cells
+            if config['df']['grid_cell'].duplicated().any():
+                raise ValueError(f"Found rows with duplicate grid_cells but different values in modality {modality_name}")
+            self.modalities[modality_name] = {
+                'df': config['df'],
+                'local_dir': Path(config['local_dir']) if isinstance(config['local_dir'], str) else config['local_dir'],
+                'tif_bands': config['tif_bands'] if not isinstance(config['tif_bands'], str) else [config['tif_bands']],
+                'png_bands': config['png_bands'] if not isinstance(config['png_bands'], str) else [config['png_bands']],
+                'tif_transforms': transforms.Compose(config['tif_transforms']) if config['tif_transforms'] is not None else None,
+                'png_transforms': transforms.Compose(config['png_transforms']) if config['png_transforms'] is not None else None
+            }
+        self.random_flip = random_flip
+        # Get the set of grid_cells for each modality
+        grid_cells_by_modality = {
+            name: set(mod['df']['grid_cell'].values)
+            for name, mod in self.modalities.items()
+        }
+        # Check that all modalities share the same grid_cells
+        if len(grid_cells_by_modality) > 0:
+            reference_grid_cells = grid_cells_by_modality[list(grid_cells_by_modality.keys())[0]]
+            for modality_name, grid_cells in grid_cells_by_modality.items():
+                if grid_cells != reference_grid_cells:
+                    missing = reference_grid_cells - grid_cells
+                    extra = grid_cells - reference_grid_cells
+                    error_msg = f"Modality {modality_name} has mismatched grid_cells.\n"
+                    if missing:
+                        error_msg += f"Missing grid_cells: {missing}\n"
+                    if extra:
+                        error_msg += f"Extra grid_cells: {extra}"
+                    raise ValueError(error_msg)
+        # Sort all dataframes by grid_cell for consistent sampling
+        for modality in self.modalities.values():
+            modality['df'] = modality['df'].sort_values('grid_cell').reset_index(drop=True)
+        print("Creating train/test split...")
+        # After sorting dataframes, create train/test split
+        all_grid_cells = list(reference_grid_cells)
+        random.shuffle(all_grid_cells)
+        n_train = int(len(all_grid_cells) * ratio_train_test)
+        self.train_grid_cells = set(all_grid_cells[:n_train])
+        self.test_grid_cells = set(all_grid_cells[n_train:])
+        # Let's create a dictionary of grid_cells to split
+        self.grid_cell_to_split = {grid_cell: 'train' if grid_cell in self.train_grid_cells else 'test' for grid_cell in reference_grid_cells}
+        print(f"Split dataset into {len(self.train_grid_cells)} train and {len(self.test_grid_cells)} test grid cells")
+    def __len__(self):
+        # Return length of any modality (they should all be the same)
+        assert len(self.modalities) > 0, "No modalities provided"
+        # Get len for each modality and make sure they are the same
+        lengths = [len(mod['df']) for mod in self.modalities.values()]
+        if not all(x == lengths[0] for x in lengths):
+            raise ValueError("All modalities must have the same number of samples")
+        return lengths[0]
+    def __getitem__(self, idx):
+        result = {}
+        # Generate the same random flip decision for all modalities
+        do_flip = self.random_flip and random.random() < 0.5
+        # Get the grid cell for this index (they're all the same across modalities)
+        first_modality = list(self.modalities.keys())[0]
+        current_grid_cell = self.modalities[first_modality]['df'].iloc[idx]['grid_cell']
+        # Determine if this sample is in train or test set
+        split = self.grid_cell_to_split[current_grid_cell]
+        for modality_name, modality in self.modalities.items():
+            meta = modality['df'].iloc[idx]
+            product_id = meta.product_id if 'product_id' in meta.index else "id"
+            grid_cell = meta.grid_cell
+            row = grid_cell.split('_')[0]
+            path = modality['local_dir'] / Path(f"{row}/{grid_cell}/{product_id}")
+            out_dict = {}
+            # Process TIF bands
+            for band in modality['tif_bands']:
+                with rio.open(path / f'{band}.tif') as f:
+                    out = f.read() # out = torch.from_numpy(f.read()).float()
+                if modality['tif_transforms'] is not None:
+                    out = modality['tif_transforms'](out)
+                out_dict[band] = out
+            # Process PNG bands
+            for band in modality['png_bands']:
+                out = Image.open(path / f'{band}.png')
+                if modality['png_transforms'] is not None:
+                    out = modality['png_transforms'](out)
+                out_dict[band] = out
+            # Apply the same random flip to all bands in this modality
+            if do_flip:
+                out_dict = {k: v.flip(-1) for k, v in out_dict.items()}
+            # Add split information to the output dictionary
+            out_dict['split'] = split
+            out_dict['grid_cell'] = current_grid_cell
+            result[modality_name] = out_dict
+        # Assert the grid_cells are the same for all modalities in the resulting dictionary
+        if len(result) > 0:
+            first_modality = list(result.keys())[0]
+            first_grid_cell = self.modalities[first_modality]['df'].iloc[idx]['grid_cell']
+            for modality_name in result.keys():
+                current_grid_cell = self.modalities[modality_name]['df'].iloc[idx]['grid_cell']
+                if current_grid_cell != first_grid_cell:
+                    raise ValueError(f"Mismatched grid_cells found: {current_grid_cell} != {first_grid_cell}")
+                # Add grid_cell to the output dictionary for verification
+                result[modality_name]['grid_cell'] = current_grid_cell
+        return result

src/COP-GEN-Beta/majortom/coverage_vis.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from mpl_toolkits.basemap import Basemap
+import PIL
+def get_mask(df):
+    """
+        Take a Major TOM dataframe and create a mask corresponding to available cells
+    """
+    mask = np.zeros((2004,4008), dtype=np.uint8)
+    row_offset = -1002
+    col_offset = -2004
+    nodata = df['nodata'].values > 0.5
+    yy = mask.shape[0] - (np.array(df['grid_row_u']) - row_offset) - 1
+    xx = np.array(df['grid_col_r']) - col_offset
+    yy = yy[~nodata]
+    xx = xx[~nodata]
+    mask[yy, xx] = 255
+    return PIL.Image.fromarray(mask)
+def fig2img(fig):
+    """Convert a Matplotlib figure to a PIL Image and return it"""
+    import io
+    buf = io.BytesIO()
+    fig.savefig(buf)
+    buf.seek(0)
+    img = PIL.Image.open(buf)
+    return img
+def light_basemap():
+    """
+        Bright coloured contours
+    """
+    with plt.ioff():
+        fig, ax = plt.subplots(figsize=(48,24), dpi=167)
+        m = Basemap(projection='sinu', lat_0=0, lon_0=0, resolution='l', ax=ax)
+        m.fillcontinents(color="#9eba9b", lake_color='#CCDDFF')
+        m.drawmapboundary(fill_color="#CCDDFF")
+        m.drawcountries(color="#666666", linewidth=1)
+        m.drawcoastlines(color="#666666", linewidth=1)
+        plt.gca().set_axis_off()
+        plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0,
+                    hspace = 0, wspace = 0)
+        plt.margins(0,0)
+        return fig2img(fig)
+def dark_basemap():
+    """
+        Dark contours
+    """
+    with plt.ioff():
+        fig, ax = plt.subplots(figsize=(48,24), dpi=167)
+        m = Basemap(projection='sinu', lat_0=0, lon_0=0, resolution='l', ax=ax)
+        m.fillcontinents(color="#242424", lake_color='#242424')
+        m.drawmapboundary(fill_color="#242424")
+        m.drawcountries(color="#000000", linewidth=1)
+        m.drawcoastlines(color="#000000", linewidth=1)
+        plt.gca().set_axis_off()
+        plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0,
+                    hspace = 0, wspace = 0)
+        plt.margins(0,0)
+        return fig2img(fig)
+def get_coveragemap(input, input2=None):
+    """
+        Creates a complete coloured Major TOM coverage figure in the same style as in the official documentation
+        Optionally, input2 can be provided and then, the map plots a map with extra colours indicating cells available only in input (green) or only input2 (blue)
+    """
+    if input2 is None:
+        return single_coveragemap(input)
+    else:
+        cmap1 = single_coveragemap(input)
+        cmap2 = single_coveragemap(input2)
+        # arrays for mixing
+        inp1_arr = np.array(cmap1)[...,:3]
+        inp2_arr = np.array(cmap2)[...,:3]
+        common_arr = inp1_arr*(inp1_arr.sum(-1) == inp2_arr.sum(-1))[:,:,None]
+        common_arr[:,:,(1,2)] = 0
+        inp1_arr[:,:,(0,2)] = 0 # Green - indicates presence of S2 only
+        inp2_arr[:,:,(0,1)] = 0 # Blue - indicates presense of DEM only
+        return PIL.Image.fromarray(((common_arr + inp1_arr + inp2_arr)).astype(np.uint8))
+def single_coveragemap(input):
+    """
+        Creates a complete coloured Major TOM coverage figure in the same style as in the official documentation
+    """
+    # compute mask if df is provided
+    if isinstance(input, pd.DataFrame):
+        mask = get_mask(input)
+    else:
+        mask = input
+    basemap = light_basemap()
+    basemap_d = dark_basemap()
+    outside_earth = np.array(basemap.convert('RGBA'))[:, :, 0] == 255
+    outside_earth = PIL.Image.fromarray(outside_earth)
+    mask = mask.resize(basemap.size, PIL.Image.NEAREST)
+    basemap.putalpha(mask)
+    # Mask outside of earth
+    basemap.paste(outside_earth, (0,0), outside_earth)
+    basemap_d.paste(basemap, (0,0), basemap)
+    return basemap_d
+if __name__ == '__main__':
+    DATASET_NAME = 'Major-TOM/Core-S2L2A'
+    meta_path = 'https://huggingface.co/datasets/{}/resolve/main/metadata.parquet'.format(DATASET_NAME)
+    df = pd.read_parquet(meta_path)
+    # This is how you make a coverage figure!
+    coverage_img = get_coveragemap(df)
+    coverage_img.save('coverage-example.png', format='PNG')
+    # and this is how you can create an overap for 2 datasets!
+    DATASET_NAME = 'Major-TOM/Core-DEM'
+    meta_path = 'https://huggingface.co/datasets/{}/resolve/main/metadata.parquet'.format(DATASET_NAME)
+    dem_df = pd.read_parquet(meta_path)
+    coverage_img = get_coveragemap(df,dem_df)
+    coverage_img.save('overlap-coverage-example.png', format='PNG')

src/COP-GEN-Beta/majortom/download_world.py ADDED Viewed

	@@ -0,0 +1,1009 @@

+import argparse
+from shapely.geometry import box
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+import os
+import geopandas as gpd
+import pandas as pd
+import pyarrow.parquet as pq
+from typing import List, Dict, Set
+import logging
+import urllib.request
+from concurrent import futures
+import fsspec
+from tqdm import tqdm
+import tempfile
+import time
+import random
+S2L2A_METADATA = ['grid_cell', 'grid_row_u', 'grid_col_r', 'product_id', 'timestamp', 'cloud_cover', 'nodata', 'centre_lat', 'centre_lon', 'crs', 'parquet_url', 'parquet_row', 'geometry']
+S2L1C_METADATA = ['grid_cell', 'grid_row_u', 'grid_col_r', 'product_id', 'timestamp', 'cloud_cover', 'nodata', 'centre_lat', 'centre_lon', 'crs', 'parquet_url', 'parquet_row', 'geometry']
+S1RTC_METADATA = ['grid_cell', 'grid_row_u', 'grid_col_r', 'product_id', 'timestamp', 'nodata', 'orbit_state', 'centre_lat', 'centre_lon', 'crs', 'parquet_url', 'parquet_row']
+DEM_METADATA = ['grid_cell', 'grid_row_u', 'grid_col_r', 'nodata', 'max_val', 'min_val', 'centre_lat', 'centre_lon', 'crs', 'parquet_url', 'parquet_row', '__index_level_0__']
+METADATA_COLUMNS = {
+    'Core-S2L2A': S2L2A_METADATA,
+    'Core-S2L1C': S2L1C_METADATA,
+    'Core-S1RTC': S1RTC_METADATA,
+    'Core-DEM': DEM_METADATA
+}
+CONTENT = {
+    'Core-S2L2A': ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12', 'cloud_mask'],
+    'Core-S2L1C': ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B10', 'B11', 'B12', 'cloud_mask'],
+    'Core-S1RTC': ['vv', 'vh'],
+    'Core-DEM': ['DEM', 'compressed']
+}
+# Default max workers for extraction (can be higher as it's CPU-bound)
+MAX_WORKERS = 32
+# Default max workers for download (more conservative to avoid network issues)
+DEFAULT_DOWNLOAD_WORKERS = 8
+def parse_args():
+    if "INTERACTIVE" in os.environ:  # Set INTERACTIVE=1 when running manually
+        return argparse.Namespace(
+            data_dir="./data/majorTOM",
+            bbox=[-180.0, -90.0, 180.0, 90.0],
+            sources=['Core-S2L2A', 'Core-S2L1C', 'Core-S1RTC', 'Core-DEM'],
+            subset_name="world",
+            start_date="2017-01-01",
+            end_date="2025-01-01",
+            cloud_cover=[0, 10],
+            preview=True,
+            mode="full",
+            delete_parquets=False,
+            download_workers=DEFAULT_DOWNLOAD_WORKERS,
+            revalidate=False
+        )
+    else:
+        parser = argparse.ArgumentParser(description='Download satellite imagery from Major-TOM dataset')
+        parser.add_argument('--data-dir', type=str, default='./data/majorTOM',
+                        help='Data directory for downloaded files')
+        parser.add_argument('--bbox', type=float, nargs=4,
+                        default=[2.9559111595, 43.8179931641, 55.4920501709, 65.808380127],
+                        help='Bounding box coordinates: minx miny maxx maxy')
+        parser.add_argument('--sources', type=str, nargs='+',
+                        default=['Core-S2L2A', 'Core-S2L1C', 'Core-S1RTC'],
+                        help='List of source names for the datasets')
+        parser.add_argument('--subset-name', type=str, required=True,
+                        help='Name for the geographical subset being created')
+        parser.add_argument('--start-date', type=str, default='2017-01-01',
+                        help='Start date for temporal range (YYYY-MM-DD)')
+        parser.add_argument('--end-date', type=str, default='2025-01-01',
+                        help='End date for temporal range (YYYY-MM-DD)')
+        parser.add_argument('--cloud-cover', type=float, nargs=2, default=[0, 10],
+                        help='Cloud cover range (min max)')
+        parser.add_argument('--criteria', type=str, default=None,
+                        help='Criteria for timestamp deduplication. Currently we support "latest"')
+        parser.add_argument('--n-samples', type=int, default=None,
+                        help='Number of samples to download')
+        parser.add_argument('--seed', type=int, default=None,
+                        help='Random seed for reproducibility')
+        parser.add_argument('--preview', action='store_true',
+                        help='If True, only print the number of samples for each source that will be downloaded')
+        parser.add_argument('--mode', type=str, choices=['full', 'download', 'extract'], default='full',
+                        help='Mode of operation: full (download and extract), download (download parquets only), extract (extract from downloaded parquets)')
+        parser.add_argument('--delete-parquets', action='store_true',
+                        help='Delete parquet files after extraction (only used with extract mode)')
+        parser.add_argument('--download-workers', type=int, default=DEFAULT_DOWNLOAD_WORKERS,
+                        help=f'Number of parallel workers for downloading files. Default: {DEFAULT_DOWNLOAD_WORKERS}. Reduce this number if downloads are slow.')
+        parser.add_argument('--revalidate', action='store_true',
+                        help='Force revalidation of all parquet files and redownload if corrupted')
+        return parser.parse_args()
+def fix_crs(df):
+    if df['crs'].iloc[0].startswith('EPSG:EPSG:'):
+        df['crs'] = df['crs'].str.replace('EPSG:EPSG:', 'EPSG:', regex=False)
+    return df
+def my_filter_metadata(df,
+                    region=None,
+                    daterange=None,
+                    cloud_cover=(0,100),
+                    nodata=(0, 1.0)
+                   ):
+    """Filters the Major-TOM dataframe based on several parameters
+    Args:
+        df (geopandas dataframe): Parent dataframe
+        region (shapely geometry object) : Region of interest
+        daterange (tuple) : Inclusive range of dates (example format: '2020-01-01')
+        cloud_cover (tuple) : Inclusive percentage range (0-100) of cloud cover
+        nodata (tuple) : Inclusive fraction (0.0-1.0) of no data allowed in a sample
+    Returns:
+        df: a filtered dataframe
+    """
+    # temporal filtering
+    if daterange is not None and 'timestamp' in df.columns:
+        assert (isinstance(daterange, list) or isinstance(daterange, tuple)) and len(daterange)==2
+        df = df[df.timestamp >= daterange[0]]
+        df = df[df.timestamp <= daterange[1]]
+    # spatial filtering
+    if region is not None:
+        idxs = df.sindex.query(region)
+        df = df.take(idxs)
+    # cloud filtering
+    if cloud_cover is not None:
+        df = df[df.cloud_cover >= cloud_cover[0]]
+        df = df[df.cloud_cover <= cloud_cover[1]]
+    # spatial filtering
+    if nodata is not None:
+        df = df[df.nodata >= nodata[0]]
+        df = df[df.nodata <= nodata[1]]
+    return df
+def my_filter_download(df, local_dir, source_name, by_row=False, verbose=False, tif_columns=None, download_workers=DEFAULT_DOWNLOAD_WORKERS):
+    """Downloads and unpacks the data of Major-TOM based on a metadata dataframe"""
+    if isinstance(local_dir, str):
+        local_dir = Path(local_dir)
+    # identify all parquets that need to be downloaded (group them)
+    urls = df.parquet_url.unique()
+    print(f'Starting parallel download of {len(urls)} parquet files.') if verbose else None
+    def process_parquet(url):
+        # Create a unique temporary file for each thread
+        temp_file = tempfile.NamedTemporaryFile(suffix=".parquet", dir=local_dir).name
+        # identify all relevant rows for this parquet
+        rows = df[df.parquet_url == url].parquet_row.unique()
+        max_retries = 3
+        retry_delay = 5  # seconds
+        success = False
+        last_error = None
+        for attempt in range(max_retries):
+            try:
+                if not by_row:
+                    # Create an opener with a longer timeout
+                    opener = urllib.request.build_opener()
+                    opener.addheaders = [('User-agent', 'Mozilla/5.0')]
+                    urllib.request.install_opener(opener)
+                    # Download with timeout using urlopen (30 minutes timeout)
+                    with urllib.request.urlopen(url, timeout=1800) as response:
+                        with open(temp_file, 'wb') as out_file:
+                            out_file.write(response.read())
+                    temp_path = temp_file
+                else:
+                    f = fsspec.open(url)
+                    temp_path = f.open()
+                # Process the downloaded parquet file
+                try:
+                    with pq.ParquetFile(temp_path) as pf:
+                        for row_idx in rows:
+                            table = pf.read_row_group(row_idx)
+                            product_id = table['product_id'][0].as_py() if 'product_id' in table.column_names else "id"
+                            grid_cell = table['grid_cell'][0].as_py()
+                            row = grid_cell.split('_')[0]
+                            dest = local_dir / Path(f"{source_name}/{row}/{grid_cell}/{product_id}")
+                            dest.mkdir(exist_ok=True, parents=True)
+                            if tif_columns == 'all':
+                                columns = [col for col in table.column_names if col[0] == 'B']
+                                if source_name in ['Core-S2L1C', 'Core-S2L2A']:
+                                    columns.append('cloud_mask')
+                            elif tif_columns is None:
+                                columns = []
+                            else:
+                                columns = tif_columns
+                            # Save tifs
+                            for col in columns:
+                                with open(dest / f"{col}.tif", "wb") as f:
+                                    f.write(table[col][0].as_py())
+                            # Save thumbnail
+                            with open(dest / "thumbnail.png", "wb") as f:
+                                f.write(table['thumbnail'][0].as_py())
+                    success = True
+                    break  # Successfully processed the file, exit retry loop
+                except Exception as e:
+                    last_error = f"Error processing parquet content: {str(e)}"
+                    if attempt < max_retries - 1:
+                        print(f"Error processing parquet content for {url}, attempt {attempt + 1}/{max_retries}: {str(e)}")
+                        time.sleep(retry_delay)
+                        continue
+                finally:
+                    # Cleanup
+                    if not by_row:
+                        try:
+                            os.remove(temp_path)
+                        except:
+                            pass
+                    else:
+                        try:
+                            f.close()
+                        except:
+                            pass
+            except urllib.error.HTTPError as e:
+                last_error = f"HTTP Error {e.code}: {str(e)}"
+                if e.code == 504 and attempt < max_retries - 1:
+                    print(f"Timeout error for {url}, attempt {attempt + 1}/{max_retries}. Retrying in {retry_delay} seconds...")
+                    time.sleep(retry_delay)
+                    continue
+            except Exception as e:
+                last_error = str(e)
+                if attempt < max_retries - 1:
+                    print(f"Error downloading {url}, attempt {attempt + 1}/{max_retries}: {str(e)}")
+                    time.sleep(retry_delay)
+                    continue
+        return {
+            'url': url,
+            'success': success,
+            'error': last_error if not success else None
+        }
+    # Use ThreadPoolExecutor for parallel downloads
+    # max_workers = min(len(urls), MAX_WORKERS*4)  # Use more workers since it's I/O bound
+    max_workers = min(len(urls), download_workers)
+    print(f"Using {max_workers} workers for parallel downloads") if verbose else None
+    results = []
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        future_to_url = {executor.submit(process_parquet, url): url for url in urls}
+        for future in tqdm(
+            futures.as_completed(future_to_url),
+            total=len(urls),
+            desc=f'Downloading {source_name} parquets'
+        ):
+            results.append(future.result())
+    # Process results and handle failures
+    failed_downloads = [r for r in results if not r['success']]
+    if failed_downloads:
+        print(f"\nWarning: Failed to download {len(failed_downloads)} parquet files for {source_name}")
+        print("\nFailed downloads:")
+        for fail in failed_downloads:
+            print(f"URL: {fail['url']}")
+            print(f"Error: {fail['error']}")
+            print("---")
+        raise RuntimeError(f"Some parquet files failed to download for {source_name}. Please retry the download.")
+    print(f"Successfully downloaded and processed {len(urls) - len(failed_downloads)} parquet files for {source_name}")
+def my_metadata_from_url(access_url, local_url):
+    local_url, response = urllib.request.urlretrieve(access_url, local_url)
+    df = pq.read_table(local_url).to_pandas()
+    if 'timestamp' in df.columns:
+        df['timestamp'] = pd.to_datetime(df.timestamp)
+    df = fix_crs(df) # Fix CRS typo if present
+    gdf = gpd.GeoDataFrame(
+        df, geometry=gpd.points_from_xy(df.centre_lon, df.centre_lat), crs=df.crs.iloc[0]
+    )
+    return gdf
+def get_metadata(source: str, output_dir: Path) -> gpd.GeoDataFrame:
+    """Fetch metadata from HuggingFace dataset for a specific source"""
+    access_url = f"https://huggingface.co/datasets/Major-TOM/{source}/resolve/main/metadata.parquet?download=true"
+    local_url = output_dir / source / "metadata.parquet"
+    local_url.parent.mkdir(exist_ok=True, parents=True)
+    if local_url.exists():
+        print(f"Using cached metadata for {source}")
+        df = pq.read_table(local_url).to_pandas()
+        if 'timestamp' in df.columns:
+            df['timestamp'] = pd.to_datetime(df.timestamp)
+        df = fix_crs(df)
+        gdf = gpd.GeoDataFrame(
+            df, geometry=gpd.points_from_xy(df.centre_lon, df.centre_lat), crs=df.crs.iloc[0]
+        )
+    else:
+        print(f"Downloading metadata for {source}...")
+        gdf = my_metadata_from_url(access_url, local_url)
+    return gdf
+def filter_data(gdf, bbox, cloud_cover, date_range):
+    """Filter metadata based on given parameters"""
+    region = box(*bbox)
+    return my_filter_metadata(
+        gdf,
+        cloud_cover=cloud_cover,
+        region=region,
+        daterange=date_range,
+        nodata=(0.0, 0.0)
+    )
+def find_common_samples(filtered_dfs: Dict[str, gpd.GeoDataFrame]) -> Dict[str, gpd.GeoDataFrame]:
+    """Find samples that share common grid cells across all datasets"""
+    # Create sets of grid_cells for each dataset
+    grid_cell_sets = {
+        source: set(df['grid_cell'].unique())
+        for source, df in filtered_dfs.items()
+    }
+    # Find intersection of all grid cell sets
+    common_grid_cells = set.intersection(*grid_cell_sets.values())
+    print(f"\033[92mFound {len(common_grid_cells)} common grid cells across all sources\033[0m")
+    # Filter dataframes to keep only rows with common grid cells
+    filtered_common = {}
+    for source, df in filtered_dfs.items():
+        filtered_common[source] = df[df['grid_cell'].isin(common_grid_cells)]
+        print(f"{source}: {len(filtered_common[source])} samples for common grid cells")
+    return filtered_common
+def download_source_files(df: gpd.GeoDataFrame, output_dir: Path, source: str, mode: str = 'full', delete_parquets: bool = False, download_workers: int = DEFAULT_DOWNLOAD_WORKERS, revalidate: bool = False):
+    """Download files for a specific source"""
+    print(f"Processing files for {source}...")
+    if mode == 'download':
+        # Only download parquet files without extracting
+        download_parquet_files(
+            df,
+            local_dir=output_dir,
+            source_name=source,
+            download_workers=download_workers,
+            revalidate=revalidate,
+            verbose=True
+        )
+    elif mode == 'extract':
+        # Extract data from already downloaded parquet files
+        extract_from_parquet_files(
+            df,
+            local_dir=output_dir,
+            source_name=source,
+            delete_parquets=delete_parquets,
+            verbose=True,
+            tif_columns=CONTENT[source]
+        )
+    else:  # mode == 'full'
+        # Use the original function for backwards compatibility
+        my_filter_download(
+            df,
+            local_dir=output_dir,
+            source_name=source,
+            by_row=False,
+            verbose=True,
+            tif_columns=CONTENT[source],
+            download_workers=download_workers
+        )
+def get_and_filter_source(args, source: str, data_dir: Path) -> gpd.GeoDataFrame:
+    """Process a single source: get metadata and filter it"""
+    source_dir = data_dir / source
+    source_dir.mkdir(exist_ok=True, parents=True)
+    # Get and filter metadata for each source
+    gdf = get_metadata(source, data_dir)
+    # Only apply cloud cover filter for Sentinel-2 sources
+    cloud_cover_filter = tuple(args.cloud_cover) if source.startswith('Core-S2') else None
+    filtered_df = filter_data(
+        gdf,
+        bbox=args.bbox,
+        cloud_cover=cloud_cover_filter,
+        date_range=(args.start_date, args.end_date)
+    )
+    print(f"Found {len(filtered_df)} samples for {source} in the specified region")
+    return filtered_df
+def download_source_parallel(source_df_tuple: tuple, subset_dir: Path, mode: str = 'full', delete_parquets: bool = False, download_workers: int = DEFAULT_DOWNLOAD_WORKERS, revalidate: bool = False):
+    """Download files for a source sequentially, with resume capability"""
+    source, df = source_df_tuple
+    source_subset_dir = subset_dir / source
+    source_subset_dir.mkdir(exist_ok=True, parents=True)
+    # Save filtered metadata
+    metadata_path = source_subset_dir / "metadata.parquet"
+    df.to_parquet(metadata_path)
+    print(f"Saved filtered metadata for {source} to {metadata_path}")
+    # If we're only downloading parquet files, we don't need to check for existing tif files
+    if mode == 'download':
+        download_source_files(df, subset_dir, source, mode=mode, delete_parquets=delete_parquets, download_workers=download_workers, revalidate=revalidate)
+        print(f"Completed parquet downloads for {source}")
+        return
+    # If we're extracting and the extraction metadata exists, we don't need to check for existing files
+    parquet_dir = subset_dir / source / "parquets"
+    extraction_file = parquet_dir / "extraction_metadata.parquet"
+    filtered_df_file = parquet_dir / "filtered_df.parquet"
+    if mode == 'extract' and extraction_file.exists() and filtered_df_file.exists():
+        print(f"Using saved extraction metadata for {source}")
+        download_source_files(df, subset_dir, source, mode=mode, delete_parquets=delete_parquets, download_workers=download_workers, revalidate=revalidate)
+        print(f"Completed extraction for {source}")
+        return
+    # Filter out already processed grid cells more efficiently
+    def get_existing_files(df, subset_dir, source):
+        # Create all possible paths
+        # For DEM, use 'id' as product_id, for other sources use actual product_id
+        product_ids = df['product_id'] if 'product_id' in df.columns else pd.Series(['id'] * len(df))
+        grid_cells = df['grid_cell']
+        row_dirs = grid_cells.str.split('_').str[0]
+        # Vectorized path creation
+        paths = [
+            subset_dir / source / row_dir / grid_cell / product_id / "thumbnail.png"
+            for row_dir, grid_cell, product_id in zip(row_dirs, grid_cells, product_ids)
+        ]
+        # Batch existence check
+        exists_mask = [path.exists() for path in tqdm(paths, desc=f"Checking existing files for {source}", unit="file")]
+        return pd.Series(exists_mask, index=df.index)
+    # Create mask of unprocessed files
+    exists_mask = get_existing_files(df, subset_dir, source)
+    df_to_process = df[~exists_mask]
+    if len(df_to_process) == 0:
+        print(f"All files for {source} are already processed. Skipping.")
+        return
+    print(f"Found {len(df) - len(df_to_process)} already processed files")
+    print(f"Processing remaining {len(df_to_process)} files for {source}...")
+    # Process the remaining data files
+    download_source_files(df_to_process, subset_dir, source, mode=mode, delete_parquets=delete_parquets, download_workers=download_workers, revalidate=revalidate)
+    print(f"Completed processing for {source}")
+def remove_duplicates(common_dfs: Dict[str, gpd.GeoDataFrame],
+                      criteria: str = None) -> Dict[str, gpd.GeoDataFrame]:
+    """Remove duplicates from common dataframes based on source-specific relevant columns."""
+    for source, df in common_dfs.items():
+        num_rows = len(df)
+        if 'timestamp' in df.columns:
+            if criteria == "latest":
+                # Sort by timestamp and keep the latest
+                df = df.sort_values(by='timestamp', ascending=False)
+            elif criteria == None:
+                raise ValueError("Please, specify a criteria for deduplication. Currently we do not support multiple timestamps for the same grid_cell.")
+            else:
+                raise ValueError("Criteria not supported")
+        # TODO:
+        # Product_id includes the timestamp.
+        # We ignore one of the two orbit_states to avoid duplicates.
+        # We can also ignore cloud_cover since we have already filtered by cloud_cover
+        # We also ignore crs. Apparently, there are rows that are entirely duplicates except for the crs (? wierd)
+        # We also ignore centre_lat and centre_lon since not always are aligned
+        subset_columns = [col for col in df.columns if col not in [
+            'parquet_row', 'parquet_url', 'geometry', 'timestamp', 'product_id',
+            'orbit_state', 'cloud_cover', 'crs', 'centre_lat', 'centre_lon'
+        ]]
+        df = df.drop_duplicates(subset=subset_columns)
+        # Verify no remaining duplicates in grid_cell
+        if df['grid_cell'].duplicated().any():
+            print(df[df['grid_cell'].duplicated()])
+            raise ValueError(f"Found rows with duplicate grid_cells but different values in source {source}")
+        common_dfs[source] = df
+        print(f"\033[94mDropped {num_rows - len(df)} duplicates from {source}\033[0m")
+    return common_dfs
+def sample_common_dfs(common_dfs: Dict[str, gpd.GeoDataFrame], n_samples: int, seed: int) -> Dict[str, gpd.GeoDataFrame]:
+    """Sample common dataframes to have n_samples samples per source"""
+    # Get all unique grid cells that appear in all dataframes
+    grid_cells_sets = [set(df['grid_cell'].unique()) for df in common_dfs.values()]
+    all_grid_cells = list(set.intersection(*grid_cells_sets))
+    if not all_grid_cells:
+        raise ValueError("No common grid cells found across all sources")
+    # Sort grid cells for reproducibility before sampling
+    all_grid_cells.sort()
+    # Randomly sample grid cells
+    random.seed(seed)
+    sampled_grid_cells = set(random.sample(all_grid_cells, min(n_samples, len(all_grid_cells))))
+    # Filter each dataframe to only include the sampled grid cells
+    result = {}
+    for source, df in common_dfs.items():
+        result[source] = df[df['grid_cell'].isin(sampled_grid_cells)]
+        print(f"Sampled {len(result[source])} rows for {source}")
+    return result
+def is_valid_parquet(parquet_path):
+    """
+    Checks if a parquet file is valid and not empty.
+    Args:
+        parquet_path: Path to the parquet file
+    Returns:
+        bool: True if the parquet file is valid, False otherwise
+    """
+    try:
+        # Check if file exists and has a non-zero size (not empty)
+        if not os.path.exists(parquet_path) or os.path.getsize(parquet_path) == 0:
+            return False
+        # Try to open and read metadata from the parquet file
+        with pq.ParquetFile(parquet_path) as pf:
+            # Check if there's at least one row group
+            if pf.num_row_groups == 0:
+                return False
+            # Try to read metadata of the first row group to verify basic integrity
+            pf.metadata
+            # Optionally, try reading a small sample of data to further verify
+            table = pf.read_row_group(0, columns=['grid_cell'])
+            return True
+    except Exception as e:
+        print(f"Error validating parquet file {parquet_path}: {str(e)}")
+        return False
+def download_parquet_files(df, local_dir, source_name, download_workers=DEFAULT_DOWNLOAD_WORKERS, revalidate=False, verbose=False):
+    """Downloads only the parquet files without extracting data, saving them to disk"""
+    if isinstance(local_dir, str):
+        local_dir = Path(local_dir)
+    # Create a directory to store parquet files
+    parquet_dir = local_dir / source_name / "parquets"
+    parquet_dir.mkdir(exist_ok=True, parents=True)
+    # Identify all parquets that need to be downloaded
+    urls = df.parquet_url.unique()
+    print(f'Starting parallel download of {len(urls)} parquet files.') if verbose else None
+    def download_parquet(url):
+        # Get the filename from the URL
+        filename = url.split('/')[-1].split('?')[0]
+        parquet_path = parquet_dir / filename
+        # Skip if file already exists and is valid (and we're not forcing revalidation)
+        if parquet_path.exists() and not revalidate:
+            if is_valid_parquet(parquet_path):
+                return {
+                    'url': url,
+                    'path': parquet_path,
+                    'success': True,
+                    'error': None,
+                    'skipped': True
+                }
+            else:
+                # File exists but is corrupted or empty, delete it for redownload
+                print(f"Found corrupted or invalid parquet file: {parquet_path}. Will redownload.")
+                try:
+                    os.remove(parquet_path)
+                except Exception as e:
+                    print(f"Warning: Failed to delete corrupted file {parquet_path}: {str(e)}")
+        elif parquet_path.exists() and revalidate:
+            # If we're revalidating, check the file and delete if invalid
+            if not is_valid_parquet(parquet_path):
+                print(f"Revalidation: Found corrupted parquet file: {parquet_path}. Will redownload.")
+                try:
+                    os.remove(parquet_path)
+                except Exception as e:
+                    print(f"Warning: Failed to delete corrupted file {parquet_path}: {str(e)}")
+            else:
+                # File is valid, skip download
+                print(f"Revalidation: Confirmed valid parquet file: {parquet_path}")
+                return {
+                    'url': url,
+                    'path': parquet_path,
+                    'success': True,
+                    'error': None,
+                    'skipped': True
+                }
+        max_retries = 3
+        retry_delay = 5  # seconds
+        success = False
+        last_error = None
+        for attempt in range(max_retries):
+            try:
+                # Create an opener with a longer timeout
+                opener = urllib.request.build_opener()
+                opener.addheaders = [('User-agent', 'Mozilla/5.0')]
+                urllib.request.install_opener(opener)
+                # Download with timeout using urlopen (30 minutes timeout)
+                with urllib.request.urlopen(url, timeout=1800) as response:
+                    with open(parquet_path, 'wb') as out_file:
+                        out_file.write(response.read())
+                # Verify the downloaded file is valid
+                if not is_valid_parquet(parquet_path):
+                    last_error = "Downloaded file is corrupted or invalid"
+                    if attempt < max_retries - 1:
+                        print(f"Error: Downloaded parquet file is corrupted, attempt {attempt + 1}/{max_retries}. Retrying...")
+                        os.remove(parquet_path)
+                        time.sleep(retry_delay)
+                        continue
+                success = True
+                break  # Successfully downloaded, exit retry loop
+            except urllib.error.HTTPError as e:
+                last_error = f"HTTP Error {e.code}: {str(e)}"
+                if e.code == 504 and attempt < max_retries - 1:
+                    print(f"Timeout error for {url}, attempt {attempt + 1}/{max_retries}. Retrying in {retry_delay} seconds...")
+                    time.sleep(retry_delay)
+                    continue
+            except Exception as e:
+                last_error = str(e)
+                if attempt < max_retries - 1:
+                    print(f"Error downloading {url}, attempt {attempt + 1}/{max_retries}: {str(e)}")
+                    time.sleep(retry_delay)
+                    continue
+                # Make sure the file is deleted if it was partially downloaded
+                if parquet_path.exists():
+                    try:
+                        os.remove(parquet_path)
+                    except:
+                        pass
+        return {
+            'url': url,
+            'path': parquet_path if success else None,
+            'success': success,
+            'error': last_error if not success else None,
+            'skipped': False
+        }
+    # Use ThreadPoolExecutor for parallel downloads with the specified number of workers
+    max_workers = min(len(urls), download_workers)
+    print(f"Using {max_workers} workers for parallel downloads") if verbose else None
+    results = []
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        future_to_url = {executor.submit(download_parquet, url): url for url in urls}
+        for future in tqdm(
+            futures.as_completed(future_to_url),
+            total=len(urls),
+            desc=f'Downloading {source_name} parquets'
+        ):
+            results.append(future.result())
+    # Process results and handle failures
+    failed_downloads = [r for r in results if not r['success']]
+    skipped_downloads = [r for r in results if r['skipped']]
+    if failed_downloads:
+        print(f"\nWarning: Failed to download {len(failed_downloads)} parquet files for {source_name}")
+        print("\nFailed downloads:")
+        for fail in failed_downloads:
+            print(f"URL: {fail['url']}")
+            print(f"Error: {fail['error']}")
+            print("---")
+        raise RuntimeError(f"Some parquet files failed to download for {source_name}. Please retry the download.")
+    print(f"Successfully downloaded {len(results) - len(failed_downloads) - len(skipped_downloads)} parquet files for {source_name}")
+    print(f"Skipped {len(skipped_downloads)} valid existing parquet files")
+    # Create a mapping from URLs to local file paths
+    url_to_path = {r['url']: r['path'] for r in results if r['success']}
+    # Save the URL to path mapping
+    mapping_file = parquet_dir / "url_to_path.parquet"
+    mapping_df = pd.DataFrame({
+        'url': list(url_to_path.keys()),
+        'path': [str(path) for path in url_to_path.values()]
+    })
+    mapping_df.to_parquet(mapping_file)
+    # Save the extraction metadata - creating a dataframe that maps URLs to the rows that need to be extracted
+    extraction_meta = []
+    for url in urls:
+        rows = df[df.parquet_url == url].parquet_row.unique()
+        for row in rows:
+            extraction_meta.append({
+                'url': url,
+                'row': row
+            })
+    extraction_df = pd.DataFrame(extraction_meta)
+    extraction_file = parquet_dir / "extraction_metadata.parquet"
+    extraction_df.to_parquet(extraction_file)
+    # Also save the full filtered dataframe for reference
+    filtered_df_file = parquet_dir / "filtered_df.parquet"
+    df.to_parquet(filtered_df_file)
+    print(f"Saved extraction metadata for {len(extraction_meta)} rows across {len(urls)} parquet files")
+    return url_to_path
+def extract_from_parquet_files(df, local_dir, source_name, delete_parquets=False, verbose=False, tif_columns=None):
+    """Extracts data from already downloaded parquet files"""
+    if isinstance(local_dir, str):
+        local_dir = Path(local_dir)
+    # Path to the directory where parquet files are stored
+    parquet_dir = local_dir / source_name / "parquets"
+    # Check if the URL to path mapping exists
+    mapping_file = parquet_dir / "url_to_path.parquet"
+    if not mapping_file.exists():
+        raise FileNotFoundError(f"URL to path mapping file not found at {mapping_file}. Please download parquet files first.")
+    # Load the URL to path mapping
+    mapping_df = pd.read_parquet(mapping_file)
+    url_to_path = dict(zip(mapping_df['url'], mapping_df['path']))
+    # Try to load the extraction metadata if it exists, otherwise use the provided dataframe
+    extraction_file = parquet_dir / "extraction_metadata.parquet"
+    filtered_df_file = parquet_dir / "filtered_df.parquet"
+    if extraction_file.exists() and filtered_df_file.exists():
+        print("Using saved extraction metadata")
+        extraction_df = pd.read_parquet(extraction_file)
+        # We need to load the original filtered df to get all the metadata
+        saved_df = pd.read_parquet(filtered_df_file)
+        # If a specific subset of df was provided, filter extraction_df to only those URLs
+        if df is not None:
+            urls_to_extract = df.parquet_url.unique()
+            extraction_df = extraction_df[extraction_df['url'].isin(urls_to_extract)]
+            saved_df = saved_df[saved_df.parquet_url.isin(urls_to_extract)]
+        # Replace the input df with the saved one
+        df = saved_df
+    else:
+        # If no saved metadata, create extraction_df from the provided df
+        print("No saved extraction metadata found, using provided dataframe")
+        extraction_df = []
+        for url in df.parquet_url.unique():
+            rows = df[df.parquet_url == url].parquet_row.unique()
+            for row in rows:
+                extraction_df.append({
+                    'url': url,
+                    'row': row
+                })
+        extraction_df = pd.DataFrame(extraction_df)
+    # Get all unique URLs that need to be processed
+    urls = extraction_df['url'].unique()
+    print(f'Starting extraction from {len(urls)} parquet files.') if verbose else None
+    # Check if all required parquet files exist and are valid
+    missing_or_invalid_urls = []
+    for url in urls:
+        if url not in url_to_path:
+            missing_or_invalid_urls.append((url, "Missing"))
+        elif not is_valid_parquet(url_to_path[url]):
+            missing_or_invalid_urls.append((url, "Invalid/Corrupted"))
+    if missing_or_invalid_urls:
+        print(f"Warning: {len(missing_or_invalid_urls)} parquet files are missing or corrupted. Please download them first.")
+        print("Issues with URLs:")
+        for url, issue in missing_or_invalid_urls[:5]:  # Show first 5 problem URLs
+            print(f"  {url} - {issue}")
+        if len(missing_or_invalid_urls) > 5:
+            print(f"  ... and {len(missing_or_invalid_urls) - 5} more")
+        raise FileNotFoundError("Some required parquet files are missing or corrupted. Please run the download step again.")
+    def process_parquet(url):
+        # Get the local path of the parquet file
+        parquet_path = url_to_path[url]
+        # Get the rows in this parquet file that we need to extract
+        rows = extraction_df[extraction_df['url'] == url]['row'].unique()
+        success = False
+        last_error = None
+        try:
+            with pq.ParquetFile(parquet_path) as pf:
+                for row_idx in rows:
+                    table = pf.read_row_group(row_idx)
+                    product_id = table['product_id'][0].as_py() if 'product_id' in table.column_names else "id"
+                    grid_cell = table['grid_cell'][0].as_py()
+                    row = grid_cell.split('_')[0]
+                    dest = local_dir / Path(f"{source_name}/{row}/{grid_cell}/{product_id}")
+                    dest.mkdir(exist_ok=True, parents=True)
+                    if tif_columns == 'all':
+                        columns = [col for col in table.column_names if col[0] == 'B']
+                        if source_name in ['Core-S2L1C', 'Core-S2L2A']:
+                            columns.append('cloud_mask')
+                    elif tif_columns is None:
+                        columns = []
+                    else:
+                        columns = tif_columns
+                    # Save tifs
+                    for col in columns:
+                        with open(dest / f"{col}.tif", "wb") as f:
+                            f.write(table[col][0].as_py())
+                    # Save thumbnail
+                    with open(dest / "thumbnail.png", "wb") as f:
+                        f.write(table['thumbnail'][0].as_py())
+            success = True
+            # Delete the parquet file if requested
+            if delete_parquets:
+                try:
+                    os.remove(parquet_path)
+                except Exception as e:
+                    print(f"Warning: Failed to delete parquet file {parquet_path}: {str(e)}")
+        except Exception as e:
+            last_error = str(e)
+        return {
+            'url': url,
+            'path': parquet_path,
+            'success': success,
+            'error': last_error if not success else None
+        }
+    # Use ThreadPoolExecutor for parallel processing
+    max_workers = min(len(urls), MAX_WORKERS)
+    results = []
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        future_to_url = {executor.submit(process_parquet, url): url for url in urls}
+        for future in tqdm(
+            futures.as_completed(future_to_url),
+            total=len(urls),
+            desc=f'Extracting from {source_name} parquets'
+        ):
+            results.append(future.result())
+    # Process results and handle failures
+    failed_extractions = [r for r in results if not r['success']]
+    if failed_extractions:
+        print(f"\nWarning: Failed to extract from {len(failed_extractions)} parquet files for {source_name}")
+        print("\nFailed extractions:")
+        for fail in failed_extractions:
+            print(f"URL: {fail['url']}")
+            print(f"Path: {fail['path']}")
+            print(f"Error: {fail['error']}")
+            print("---")
+        raise RuntimeError(f"Some parquet extractions failed for {source_name}.")
+    print(f"Successfully extracted data from {len(results) - len(failed_extractions)} parquet files for {source_name}")
+    # Clean up the metadata files if all parquet files were deleted
+    if delete_parquets and not any(os.path.exists(r['path']) for r in results):
+        try:
+            # Delete all metadata files
+            for meta_file in [mapping_file, extraction_file, filtered_df_file]:
+                if meta_file.exists():
+                    os.remove(meta_file)
+            # Try to remove the parquets directory if it's empty
+            if os.path.exists(parquet_dir) and not os.listdir(parquet_dir):
+                os.rmdir(parquet_dir)
+            print(f"Cleaned up metadata files and directory for {source_name}")
+        except Exception as e:
+            print(f"Warning: Failed to clean up metadata files or directory: {str(e)}")
+    return results
+def main():
+    args = parse_args()
+    logging.basicConfig(level=logging.INFO)
+    data_dir = Path(args.data_dir)
+    subset_dir = data_dir / args.subset_name
+    # Always process metadata and filtering for all modes
+    print("\033[92mFetching and filtering metadata...\033[0m")
+    # Parallel processing of metadata fetching and filtering
+    max_workers = min(len(args.sources), MAX_WORKERS)
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        future_to_source = {
+            executor.submit(get_and_filter_source, args, source, data_dir): source
+            for source in args.sources
+        }
+        # Collect results while maintaining order
+        filtered_dfs = {}
+        for future in futures.as_completed(future_to_source):
+            source = future_to_source[future]
+            try:
+                filtered_dfs[source] = future.result()
+            except Exception as e:
+                print(f"Error processing {source}: {e}")
+                raise e
+    # Synchronization point: find common samples across all sources
+    common_dfs = find_common_samples(filtered_dfs)
+    # Remove duplicates for each of the common_dfs
+    common_dfs = remove_duplicates(common_dfs, criteria=args.criteria)
+    # After removing duplicates, print the number of samples for each source
+    print("\033[92mAfter removing duplicates:\033[0m")
+    for source, df in common_dfs.items():
+        print(f"{source}: {len(df)} samples for common grid cells")
+    if args.preview:
+        return
+    if args.n_samples is not None: # Else, we download all samples.
+        print(f"Sampling {args.n_samples} samples per source...")
+        common_dfs = sample_common_dfs(common_dfs, args.n_samples, args.seed)
+        print(f"Done sampling {args.n_samples} grid cells per source!")
+    # Remove Core-DEM from common_dfs, because it is already downloaded.
+    # Comment / Uncomment when needed.
+    # common_dfs.pop('Core-DEM')
+    # common_dfs.pop('Core-S1RTC')
+    # common_dfs.pop('Core-S2L1C')
+    # common_dfs.pop('Core-S2L2A')
+    print(f"We will only process the following modalities: {list(common_dfs.keys())}")
+    # Print information about download workers
+    if args.mode in ['download', 'full']:
+        print(f"\033[94mUsing {args.download_workers} workers for parallel downloads\033[0m")
+        print("If downloads are slow, try reducing this number with the --download-workers parameter")
+        if args.revalidate:
+            print("\033[94mRevalidating all parquet files (will check for corrupted files)\033[0m")
+        else:
+            print("Use --revalidate to force checking of existing parquet files for corruption")
+    # Execute the appropriate action based on mode
+    if args.mode == 'download':
+        print("\033[92mStarting download of parquet files...\033[0m")
+        for source, df in common_dfs.items():
+            print(f"\033[94mDownloading parquets for modality: {source}\033[0m")
+            download_source_parallel((source, df), subset_dir, mode='download',
+                                    delete_parquets=args.delete_parquets,
+                                    download_workers=args.download_workers,
+                                    revalidate=args.revalidate)
+        print("\033[92mParquet file download complete.\033[0m")
+        print("To extract data from these parquet files, run this script with --mode extract")
+    elif args.mode == 'extract':
+        print("\033[92mStarting extraction from parquet files...\033[0m")
+        for source, df in common_dfs.items():
+            print(f"\033[94mExtracting data for modality: {source}\033[0m")
+            download_source_parallel((source, df), subset_dir, mode='extract',
+                                    delete_parquets=args.delete_parquets,
+                                    download_workers=args.download_workers,
+                                    revalidate=args.revalidate)
+        print("\033[92mData extraction complete.\033[0m")
+        if args.delete_parquets:
+            print("Parquet files have been deleted.")
+        else:
+            print("To delete the parquet files, run this script with --mode extract --delete-parquets")
+    else:  # mode == 'full'
+        print("\033[92mStarting full download and extraction process...\033[0m")
+        for source, df in common_dfs.items():
+            print(f"\033[94mProcessing modality: {source}\033[0m")
+            download_source_parallel((source, df), subset_dir, mode='full',
+                                    download_workers=args.download_workers,
+                                    revalidate=args.revalidate)
+        print("\033[92mDownload and extraction complete.\033[0m")
+if __name__ == "__main__":
+    main()

src/COP-GEN-Beta/prepare_dataset_images.py ADDED Viewed

	@@ -0,0 +1,488 @@

+from libs.autoencoder import get_model
+import torch.nn as nn
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+import torchvision
+import torchvision.transforms as transforms
+from tqdm import tqdm
+import os
+import argparse
+from pathlib import Path
+import glob
+from majortom.NMajorTOM import NMajorTOM
+import pyarrow.parquet as pq
+import geopandas as gpd
+import pandas as pd
+from majortom.coverage_vis import get_coveragemap
+torch.manual_seed(0)
+np.random.seed(0)
+PATCH_SIZE = 256
+GRID_SIZE = 4  # 4x4 grid of patches
+SATELLITE_CONFIGS = {
+    'S2L2A': {
+        'tif_bands': ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12', 'cloud_mask'],
+        'png_bands': ['thumbnail'],
+        'tif_transforms': [],
+        'png_transforms': [
+            transforms.CenterCrop(PATCH_SIZE * GRID_SIZE),  # Crop to 1024x1024
+            transforms.ToTensor(),
+            transforms.Normalize(mean=(0.5,), std=(0.5,))
+        ]
+    },
+    'S2L1C': {
+        'tif_bands': ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12', 'cloud_mask'],
+        'png_bands': ['thumbnail'],
+        'tif_transforms': [],
+        'png_transforms': [
+            transforms.CenterCrop(PATCH_SIZE * GRID_SIZE),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=(0.5,), std=(0.5,))
+        ]
+    },
+    'S1RTC': {
+        'tif_bands': ['vv', 'vh'],
+        'png_bands': ['thumbnail'],
+        'tif_transforms': [],
+        'png_transforms': [
+            transforms.CenterCrop(PATCH_SIZE * GRID_SIZE),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=(0.5,), std=(0.5,))
+        ]
+    },
+    'DEM': {
+        'tif_bands': ['DEM', 'compressed'],
+        'png_bands': ['thumbnail'],
+        'tif_transforms': [],
+        'png_transforms': [
+            transforms.Resize(1068), # First, interpolate to match the resolution of the other modalities (1068x1068)
+            transforms.CenterCrop(PATCH_SIZE * GRID_SIZE),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=(0.5,), std=(0.5,))
+        ]
+    }
+}
+def fix_crs(df):
+    if df['crs'].iloc[0].startswith('EPSG:EPSG:'):
+        df['crs'] = df['crs'].str.replace('EPSG:EPSG:', 'EPSG:', regex=False)
+    return df
+def load_metadata(path):
+    df = pq.read_table(path).to_pandas()
+    if 'timestamp' in df.columns:
+        df['timestamp'] = pd.to_datetime(df.timestamp)
+    df = fix_crs(df)
+    gdf = gpd.GeoDataFrame(
+        df, geometry=gpd.points_from_xy(df.centre_lon, df.centre_lat), crs=df.crs.iloc[0]
+    )
+    return gdf
+def process_satellite(subset_path, satellite_types, bands_per_type, ratio_train_test, seed):
+    """Process multiple satellite types simultaneously while ensuring they're paired"""
+    modalities = {}
+    filtered_dfs = {}
+    # First, load metadata for all satellite types
+    for sat_type in satellite_types:
+        metadata_path = os.path.join(subset_path, f"Core-{sat_type}", "metadata.parquet")
+        if not os.path.exists(metadata_path):
+            print(f"Skipping {sat_type}: metadata not found at {metadata_path}")
+            continue
+        gdf = load_metadata(metadata_path)
+        local_dir = os.path.join(subset_path, f"Core-{sat_type}")
+        # Split bands into tif and png based on configuration
+        tif_bands = [b for b in bands_per_type[sat_type] if b in SATELLITE_CONFIGS[sat_type]['tif_bands']]
+        png_bands = [b for b in bands_per_type[sat_type] if b in SATELLITE_CONFIGS[sat_type]['png_bands']]
+        print(f"\nChecking files for {sat_type}...")
+        # Check which indices have all required files
+        valid_indices = []
+        for idx in tqdm(range(len(gdf)), desc=f"Validating {sat_type} samples", unit="samples"):
+            row = gdf.iloc[idx]
+            grid_cell = row.grid_cell
+            row_id = grid_cell.split('_')[0]
+            product_id = row.product_id if 'product_id' in row.index else "id"
+            base_path = os.path.join(local_dir, row_id, grid_cell, product_id)
+            all_files_exist = True
+            # Check TIF files
+            for band in tif_bands:
+                if not os.path.exists(os.path.join(base_path, f"{band}.tif")):
+                    all_files_exist = False
+                    break
+            # Check PNG files
+            if all_files_exist:  # Only check PNGs if TIFs exist
+                for band in png_bands:
+                    if not os.path.exists(os.path.join(base_path, f"{band}.png")):
+                        all_files_exist = False
+                        break
+            if all_files_exist:
+                valid_indices.append(idx)
+        filtered_df = gdf.iloc[valid_indices].copy()
+        print(f"Found {len(filtered_df)} valid samples out of {len(gdf)} for {sat_type}")
+        filtered_dfs[sat_type] = filtered_df
+    # Find common grid cells across all modalities
+    grid_cell_sets = {
+        source: set(df['grid_cell'].unique())
+        for source, df in filtered_dfs.items()
+    }
+    # Find intersection of all grid cell sets
+    common_grid_cells = set.intersection(*grid_cell_sets.values())
+    print(f"\nFound {len(common_grid_cells)} common grid cells across all modalities")
+    # Filter all modalities to keep only common grid cells
+    for sat_type in satellite_types:
+        if sat_type not in filtered_dfs:
+            continue
+        df = filtered_dfs[sat_type]
+        df = df[df['grid_cell'].isin(common_grid_cells)]
+        print(f"{sat_type}: {len(df)} samples for common grid cells")
+        modalities[sat_type] = {
+            'df': df,
+            'local_dir': os.path.join(subset_path, f"Core-{sat_type}"),
+            'tif_bands': tif_bands,
+            'png_bands': png_bands,
+            'tif_transforms': SATELLITE_CONFIGS[sat_type]['tif_transforms'],
+            'png_transforms': SATELLITE_CONFIGS[sat_type]['png_transforms']
+        }
+    dataset = NMajorTOM(modalities=modalities, ratio_train_test=ratio_train_test, seed=seed)
+    return dataset, len(common_grid_cells)
+def is_valid_image(filepath):
+    """Check if an image file is valid and can be opened. Deletes the file if corrupted."""
+    try:
+        from PIL import Image
+        with Image.open(filepath) as img:
+            img.verify()  # Verify it's actually an image
+        return True
+    except Exception:
+        print(f"  Warning: Corrupted or invalid image found: {filepath}")
+        try:
+            os.remove(filepath)
+            print(f"  Deleted corrupted file: {filepath}")
+        except Exception as e:
+            print(f"  Failed to delete corrupted file {filepath}: {e}")
+        return False
+def get_existing_complete_grid_cells(output_dir, satellite_types, bands_per_type, num_grid_cells, expected_patches=16):
+    """Returns a set of grid_cells that already have all their patches for all modalities"""
+    complete_grid_cells_by_sat = {}
+    corrupted_grid_cells = set()  # Track grid cells with corrupted files
+    for sat_type in satellite_types:
+        sat_base_dir = f"{sat_type}_{'_'.join(bands_per_type[sat_type])}"
+        complete_grid_cells_by_sat[sat_type] = set()
+        # Check both train and test directories
+        for split in ['train', 'test']:
+            dir_path = os.path.join(output_dir, split, sat_base_dir)
+            print(f"  Checking {dir_path} for existing complete grid cells")
+            if not os.path.exists(dir_path):
+                print(f"  Warning: Directory {dir_path} does not exist")
+                continue
+            # Get all PNG files and extract their grid cells
+            png_files = glob.glob(os.path.join(dir_path, "*.png"))
+            print(f"  Found {len(png_files)} PNG files in {dir_path}")
+            current_grid_cells = {}
+            for f in png_files:
+                # This will now delete the file if it's corrupted
+                if not is_valid_image(f):
+                    # Get the grid cell from the corrupted file
+                    base_name = os.path.basename(f)
+                    corrupted_grid_cell = "_".join(base_name.split("_")[:-1])
+                    # Add to set of corrupted grid cells
+                    corrupted_grid_cells.add(corrupted_grid_cell)
+                    # Remove this grid cell from our complete cells since we'll need to regenerate it
+                    if corrupted_grid_cell in current_grid_cells:
+                        del current_grid_cells[corrupted_grid_cell]
+                    continue
+                base_name = os.path.basename(f)
+                grid_cell = "_".join(base_name.split("_")[:-1])  # Remove patch number
+                current_grid_cells[grid_cell] = current_grid_cells.get(grid_cell, 0) + 1
+            # Keep only grid cells with exactly the expected number of patches
+            complete_cells = {gc for gc, count in current_grid_cells.items() if count == expected_patches}
+            print(f"  Found {len(complete_cells)} complete grid cells in {split} split for {sat_type}")
+            complete_grid_cells_by_sat[sat_type].update(complete_cells)
+        print(f"Total complete grid cells for {sat_type}: {len(complete_grid_cells_by_sat[sat_type])}")
+    # Find grid cells that are complete across all satellite types
+    if not complete_grid_cells_by_sat:
+        return set()
+    complete_grid_cells = set.intersection(*complete_grid_cells_by_sat.values())
+    # Remove any grid cells that had corrupted files
+    complete_grid_cells = complete_grid_cells - corrupted_grid_cells
+    # Print detailed debugging information
+    print("\nComplete grid cells by satellite type:")
+    for sat_type, cells in complete_grid_cells_by_sat.items():
+        print(f"{sat_type}: {len(cells)} grid cells")
+    print(f"\nGrid cells complete across all types: {len(complete_grid_cells)}")
+    if corrupted_grid_cells:
+        print(f"Removed {len(corrupted_grid_cells)} grid cells due to corrupted files")
+    if len(complete_grid_cells) < num_grid_cells:
+        # Find which grid cells are missing from which satellite types
+        all_grid_cells = set.union(*complete_grid_cells_by_sat.values())
+        print("\nAnalyzing missing grid cells:")
+        for grid_cell in all_grid_cells:
+            missing_from = [sat_type for sat_type in satellite_types
+                          if grid_cell not in complete_grid_cells_by_sat[sat_type]]
+            if missing_from:
+                print(f"Grid cell {grid_cell} is missing from: {', '.join(missing_from)}")
+    return complete_grid_cells
+def crop_images(dataset, satellite_types, bands_per_type, output_dir, num_grid_cells, flip=False, center_crop=False):
+    """Extract features for all modalities simultaneously while ensuring they're paired"""
+    from concurrent.futures import ThreadPoolExecutor
+    import itertools
+    # Create output directories if saving PNGs
+    for sat_type in satellite_types:
+        sat_base_dir = f"{sat_type}_{'_'.join(bands_per_type[sat_type])}"
+        os.makedirs(os.path.join(output_dir, 'train', sat_base_dir), exist_ok=True)
+        os.makedirs(os.path.join(output_dir, 'test', sat_base_dir), exist_ok=True)
+    # Get already processed grid cells
+    print("Checking for existing complete grid cells...")
+    # Adjust the expected patch count based on center_crop mode
+    expected_patches = 1 if center_crop else GRID_SIZE * GRID_SIZE
+    # complete_grid_cells = get_existing_complete_grid_cells(output_dir, satellite_types, bands_per_type, num_grid_cells, expected_patches)
+    complete_grid_cells = set()
+    print(f"Found {len(complete_grid_cells)} already processed grid cells")
+    # Pre-calculate patch positions (only used if not center_crop)
+    patch_positions = list(itertools.product(range(GRID_SIZE), range(GRID_SIZE)))
+    def process_sample(sample):
+        """Process a single sample (large image) and return metadata for all its patches"""
+        # Check if this grid cell is already processed
+        grid_cell = sample[satellite_types[0]]['grid_cell']
+        if grid_cell in complete_grid_cells:
+            print(f"Skipping {grid_cell} because it already has all its patches")
+            return []
+        sample_metadata = []
+        for sat_type in satellite_types:
+            modality_data = sample[sat_type]
+            split = modality_data['split']
+            grid_cell = modality_data['grid_cell']
+            img = modality_data['thumbnail']
+            if center_crop:
+                # Calculate center crop coordinates
+                h, w = img.shape[-2:]
+                start_h = (h - PATCH_SIZE) // 2
+                start_w = (w - PATCH_SIZE) // 2
+                patch = img[:, start_h:start_h + PATCH_SIZE, start_w:start_w + PATCH_SIZE]
+                patches = patch.unsqueeze(0)  # Add batch dimension
+            else:
+                # Original patchifying logic
+                C = img.size(0)
+                patches = img.unfold(1, PATCH_SIZE, PATCH_SIZE).unfold(2, PATCH_SIZE, PATCH_SIZE)
+                patches = patches.permute(0, 1, 2, 3, 4).reshape(C, -1, PATCH_SIZE, PATCH_SIZE)
+                patches = patches.permute(1, 0, 2, 3)  # [N_patches, C, H, W]
+            if sat_type == 'DEM':
+                patches = patches.repeat(1, 3, 1, 1)
+            # Compute paths once
+            sat_base_dir = f"{sat_type}_thumbnail"
+            save_dir = os.path.join(output_dir, split, sat_base_dir)
+            # Batch denormalize
+            patches_denorm = (patches.detach().cpu() + 1) / 2
+            # Save images
+            for patch_idx, patch in enumerate(patches_denorm):
+                if center_crop:
+                    filename = f"{grid_cell}_center.png"
+                    metadata = {
+                        'grid_cell': grid_cell,
+                        'satellite': sat_type,
+                        'bands': 'thumbnail',
+                        'split': split,
+                        'patch_num': 0,
+                        'patch_row': (GRID_SIZE - 1) // 2,
+                        'patch_col': (GRID_SIZE - 1) // 2
+                    }
+                else:
+                    filename = f"{grid_cell}_{patch_idx}.png"
+                    metadata = {
+                        'grid_cell': grid_cell,
+                        'satellite': sat_type,
+                        'bands': 'thumbnail',
+                        'split': split,
+                        'patch_num': patch_idx,
+                        'patch_row': patch_positions[patch_idx][0],
+                        'patch_col': patch_positions[patch_idx][1]
+                    }
+                torchvision.utils.save_image(patch, os.path.join(save_dir, filename))
+                sample_metadata.append(metadata)
+        return sample_metadata
+    # Process samples in parallel
+    all_metadata = []
+    total_samples = len(dataset)
+    print(f"Processing {total_samples} samples...")
+    with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
+        # Create a list to store futures
+        futures = []
+        # Submit tasks with progress bar around the dataset iteration
+        for sample in tqdm(dataset, total=total_samples,
+                         desc="Processing samples",
+                         unit="sample",
+                         dynamic_ncols=True):
+            future = executor.submit(process_sample, sample)
+            futures.append(future)
+        # Collect results
+        for future in futures:
+            metadata = future.result()
+            if metadata:  # Only add metadata for newly processed samples
+                all_metadata.extend(metadata)
+    # Convert to DataFrame and split by train/test
+    if all_metadata:  # Only process if we have new metadata
+        df = pd.DataFrame(all_metadata)
+        train_df = df[df['split'] == 'train'].drop('split', axis=1)
+        test_df = df[df['split'] == 'test'].drop('split', axis=1)
+        # Load existing metadata if it exists and append new data
+        train_path = os.path.join(output_dir, 'train_metadata.parquet')
+        test_path = os.path.join(output_dir, 'test_metadata.parquet')
+        if os.path.exists(train_path):
+            existing_train = pd.read_parquet(train_path)
+            train_df = pd.concat([existing_train, train_df], ignore_index=True)
+            # Deduplicate based on all columns
+            train_df = train_df.drop_duplicates(subset=['grid_cell', 'satellite', 'patch_num'])
+        if os.path.exists(test_path):
+            existing_test = pd.read_parquet(test_path)
+            test_df = pd.concat([existing_test, test_df], ignore_index=True)
+            # Deduplicate based on all columns
+            test_df = test_df.drop_duplicates(subset=['grid_cell', 'satellite', 'patch_num'])
+        # Save metadata
+        train_df.to_parquet(train_path)
+        test_df.to_parquet(test_path)
+        print(f"Processed {len(all_metadata) // (16 * len(satellite_types))} new grid cells")
+        print(f"Total metadata: {len(train_df)} training and {len(test_df)} testing samples")
+    else:
+        print("No new grid cells to process")
+def visualize_patches(dataset, satellite_types, bands_per_type, output_dir):
+    """Visualize the coverage of patches in a world map"""
+    # Take the first satellite type since they're all paired
+    sat_type = satellite_types[0]
+    modality = dataset.modalities[sat_type]
+    df = modality['df']
+    # Lets split into train and test.
+    # First, add the split column to the dataframe based on the grid_cell_to_split dictionary
+    df['split'] = df['grid_cell'].map(dataset.grid_cell_to_split)
+    # Create coverage map
+    coverage_img_all = get_coveragemap(df)
+    coverage_img_train = get_coveragemap(df[df['split'] == 'train'])
+    coverage_img_test = get_coveragemap(df[df['split'] == 'test'])
+    coverage_img_train_test = get_coveragemap(df[df['split'] == 'train'], df[df['split'] == 'test'])
+    # Save the coverage map
+    coverage_path_all = os.path.join(output_dir, 'coverage_map_all.png')
+    coverage_path_train = os.path.join(output_dir, 'coverage_map_train.png')
+    coverage_path_test = os.path.join(output_dir, 'coverage_map_test.png')
+    coverage_path_train_test = os.path.join(output_dir, 'coverage_map_train_test.png')
+    coverage_img_all.save(coverage_path_all, format='PNG')
+    coverage_img_train.save(coverage_path_train, format='PNG')
+    coverage_img_test.save(coverage_path_test, format='PNG')
+    coverage_img_train_test.save(coverage_path_train_test, format='PNG')
+    print(f"Saved coverage maps to {coverage_path_all}, {coverage_path_train}, {coverage_path_test} and {coverage_path_train_test}")
+def main():
+    parser = argparse.ArgumentParser(description='Extract features from MajorTOM dataset')
+    parser.add_argument('--subset_path', required=True, help='Path to the subset folder')
+    parser.add_argument('--output_dir', required=True, help='Path to the output directory')
+    parser.add_argument('--bands', nargs='+', required=True, help='Bands to process (e.g., B1 B2 B3 DEM vv vh)')
+    parser.add_argument('--ratio_train_test', type=float, default=0.95, help='Ratio of training to testing data')
+    parser.add_argument('--flip', action='store_true', help='Flip the patches')
+    parser.add_argument('--visualize', action='store_true', help='Visualize the patches in a world map')
+    parser.add_argument('--seed', type=int, default=42, help='Random seed')
+    parser.add_argument('--center_crop', action='store_true', help='Use center crop instead of patchifying')
+    args = parser.parse_args()
+    # Get subset name from path
+    subset_name = Path(args.subset_path).name
+    print("Flip is set to", args.flip)
+    print("Seed is set to", args.seed)
+    print("Subset path is", args.subset_path)
+    print("Bands are", args.bands)
+    print("Ratio train test is", args.ratio_train_test)
+    print("Visualize is set to", args.visualize)
+    print("Center crop is set to", args.center_crop)
+    # Create the main output directory
+    all_bands = '_'.join(sorted(args.bands))
+    os.makedirs(args.output_dir, exist_ok=True)
+    # Group bands by satellite type
+    bands_per_type = {}
+    satellite_types = []
+    for sat_type, config in SATELLITE_CONFIGS.items():
+        all_sat_bands = config['tif_bands'] + config['png_bands']
+        sat_bands = [b for b in args.bands if b in all_sat_bands]
+        if sat_bands:
+            bands_per_type[sat_type] = sat_bands
+            satellite_types.append(sat_type)
+    if satellite_types:
+        # Process all satellite types together
+        dataset, num_grid_cells = process_satellite(args.subset_path, satellite_types, bands_per_type, args.ratio_train_test, args.seed)
+        if args.visualize:
+            print("==> Visualizing patches...")
+            visualize_patches(dataset, satellite_types, bands_per_type, args.output_dir)
+            print("==> Done visualizing patches! Exiting...")
+            # exit()
+        print("==> Cropping images...")
+        crop_images(dataset, satellite_types, bands_per_type, args.output_dir, num_grid_cells,
+                    flip=args.flip, center_crop=args.center_crop)
+if __name__ == "__main__":
+    main()

src/COP-GEN-Beta/sample_n_triffuser.py ADDED Viewed

	@@ -0,0 +1,652 @@

+import ml_collections
+import torch
+import random
+import utils
+from dpm_solver_pp import NoiseScheduleVP, DPM_Solver
+from absl import logging
+import einops
+import libs.autoencoder
+from torchvision.utils import save_image, make_grid
+import torchvision.transforms as standard_transforms
+import numpy as np
+from PIL import Image
+import time
+import copy
+from datasets import get_dataset
+from torch.utils.data import Dataset, DataLoader
+from tqdm.auto import tqdm
+from torch.utils._pytree import tree_map
+import glob
+import os
+import functools
+from concurrent.futures import ThreadPoolExecutor
+# Add profiling tools
+class Profiler:
+    def __init__(self):
+        self.times = {}
+    def profile(self, func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            start_time = time.time()
+            result = func(*args, **kwargs)
+            end_time = time.time()
+            func_name = func.__name__
+            if func_name not in self.times:
+                self.times[func_name] = []
+            self.times[func_name].append(end_time - start_time)
+            return result
+        return wrapper
+    def summary(self):
+        print("\n----- Profiling Summary -----")
+        for func_name, times in self.times.items():
+            avg_time = sum(times) / len(times)
+            total_time = sum(times)
+            calls = len(times)
+            print(f"{func_name}: {total_time:.2f}s total, {avg_time:.4f}s avg, {calls} calls")
+        print("----------------------------\n")
+profiler = Profiler()
+MODALITIES = {
+    4: ['dem', 's1_rtc', 's2_l1c', 's2_l2a'],
+    3: ['dem', 's1_rtc', 's2_l1c'],
+    2: ['s1_rtc', 's2_l2a'],
+}
+MODEL_RESOLUTION = 256
+"""
+Sampling for any n modalities
+> python3 sample_n_triffuser.py --config=path --data_path=path --nnet_path=path \
+                    --n_mod=int --n_samples=int \
+                    --generate=[modalities] --condition=[modalities]
+Generate all modalities unconditional (joint):
+python3 sample_n_triffuser.py --n_mod=4 --generate=s2_l1c,s2_l2a,s1_rtc,dem
+Generate a pair unconditional (joint):
+python3 sample_n_triffuser.py --n_mod=4 --generate=s1_rtc,s2_l2a
+Generate s1_rtc and s2_l2a, conditioned on dem and s2_l1c (conditional):
+python3 sample_n_triffuser.py --n_mod=4 --generate=s1_rtc,s2_l2a --condition=dem,s2_l1c
+Generate dem conditioned on s1_rtc, s2_l1c, s2_l2a (conditional):
+python3 sample_n_triffuser.py --n_mod=4 --generate=dem --condition=s1_rtc,s2_l1c,s2_l2a
+Generate dem conditioned on s1_rtc (conditional) (the rest are automatically ignored: s2_l1c, s2_l2a):
+python3 sample_n_triffuser.py --n_mod=4 --generate=dem --condition=s1_rtc
+Generate dem unconditional (marginal) (no condition, the rest are ignored):
+python3 sample_n_triffuser.py --n_mod=4 --generate=dem
+Note:
+--generate flag is mandatory
+"generate" modalities and "condition" modalities should always be different
+"""
+class CustomImageDataset(Dataset):
+    def __init__(self, folder_path, transform=None):
+        self.folder_path = folder_path
+        self.transform = transform
+        self.image_files = glob.glob(os.path.join(folder_path, "*.png"))
+        print("There are", len(self.image_files), "images in the dataset")
+    def __len__(self):
+        return len(self.image_files)
+    def __getitem__(self, idx):
+        image = Image.open(self.image_files[idx]).convert("RGB")
+        if self.transform:
+            image = self.transform(image)
+        # Return both the image and the filename
+        return image, os.path.basename(self.image_files[idx])
+def stable_diffusion_beta_schedule(linear_start=0.00085, linear_end=0.0120, n_timestep=1000):
+    _betas = (
+        torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2
+    )
+    return _betas.numpy()
+@profiler.profile
+def prepare_contexts(config, images, filenames, device, autoencoder=None):
+    """
+    If a modality is conditional, we need to return the npy feature encodings
+    If a modality is unconditional, we need to return random noise
+    batch_shape = (n_modalities, B, C, H, W)
+    Returns:
+        img_contexts: Tensor containing contexts for each modality
+        processed_filenames: List of filenames, duplicated and labeled with version suffixes if n_samples > 1
+    """
+    # Create a noise tensor with the same shape as the images batch
+    if config.data_type == 'lmdb':
+        effective_batch_size = images[0].shape[0] * config.n_samples if config.n_samples > 1 else images[0].shape[0]
+        img_contexts = torch.randn(config.num_modalities, effective_batch_size, *images[0].shape[1:], device=device)
+    elif config.data_type == 'folder-img':
+        # Calculate effective batch size (original batch size * n_samples)
+        effective_batch_size = images.shape[0] * config.n_samples if config.n_samples > 1 else images.shape[0]
+        # Multiply the images_batch shape by 2 because we have both mean and variance
+        # as output from the autoencoder
+        img_contexts = torch.randn(config.num_modalities, effective_batch_size, 2 * config.z_shape[0],
+                                   config.z_shape[1], config.z_shape[2], device=device)
+    # Process filenames - duplicate them if n_samples > 1 and add version suffixes
+    processed_filenames = []
+    if config.n_samples > 1:
+        for filename in filenames:
+            for i in range(config.n_samples):
+                processed_filenames.append(f"{filename}_v{i+1}")
+    else:
+        processed_filenames = filenames
+    # For each modality in the images_batch, if it is conditional, load and duplicate the npy feature encodings
+    for i, modality in enumerate(config.modalities):
+        if config.condition_modalities_mask[i]:
+            if config.data_type == 'lmdb':
+                # Duplicate each conditional input n_samples times
+                img_contexts[i] = images[i].repeat_interleave(config.n_samples, dim=0)
+            elif config.data_type == 'folder-img':
+                assert autoencoder is not None, "Autoencoder must be provided for folder-img data type"
+                # Duplicate each conditional input n_samples times
+                duplicated_batch = images.repeat_interleave(config.n_samples, dim=0)
+                img_contexts[i] = autoencoder.encode_moments(duplicated_batch)
+                # Padding the latents experiment
+                # duplicated_batch = images.repeat_interleave(config.n_samples, dim=0)
+                # intermediate_latents = autoencoder.encode_moments(duplicated_batch)
+                # padded_latents = torch.nn.functional.pad(intermediate_latents, (8, 8, 8, 8), mode='reflect')
+                # img_contexts[i] = padded_latents
+    return img_contexts, processed_filenames
+def unpreprocess(v):  # to B C H W and [0, 1]
+    v = 0.5 * (v + 1.)
+    v.clamp_(0., 1.)
+    return v
+def set_seed(seed: int):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+def evaluate(config):
+    if config.get('benchmark', False):
+        torch.backends.cudnn.benchmark = True
+        torch.backends.cudnn.deterministic = False
+    # Create output directory once at the start
+    os.makedirs(config.output_path, exist_ok=True)
+    # Create a directory for each modality if we are saving as pngs
+    if config.save_as == 'pngs':
+        for modality in config.generate_modalities:
+            os.makedirs(os.path.join(config.output_path, modality), exist_ok=True)
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    set_seed(config.seed)
+    config = ml_collections.FrozenConfigDict(config)
+    utils.set_logger(log_level='info')
+    _betas = stable_diffusion_beta_schedule()
+    N = len(_betas)
+    nnet = utils.get_nnet(**config.nnet)
+    logging.info(f'load nnet from {config.nnet_path}')
+    nnet.load_state_dict(torch.load(config.nnet_path, map_location='cpu'))
+    nnet.to(device)
+    nnet.eval()
+    if config.data_type == 'lmdb':
+        # Edit the dataset path to the data path from the command line arguments
+        dataset_config = ml_collections.ConfigDict(config.to_dict())
+        dataset_config.dataset.path = config.data_path
+        # Always return the filename
+        dataset_config.dataset.return_filename = True
+        dataset = get_dataset(**dataset_config.dataset)
+        # TODO: This is not intuitive. Split is train but it is returning the test set. See datasets.py
+        test_dataset = dataset.get_split(split='train', labeled=False)
+        # Create a generator with fixed seed for reproducible shuffling
+        g = torch.Generator()
+        g.manual_seed(config.seed)  # Using the same seed as set earlier in the code
+        dataloader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=True, drop_last=False,
+                               num_workers=8, pin_memory=True, persistent_workers=True, generator=g)
+    elif config.data_type == 'folder-img':
+        print("config.data_path", config.data_path)
+        if config.resolution >= MODEL_RESOLUTION:
+            transform = standard_transforms.Compose([
+                standard_transforms.CenterCrop(MODEL_RESOLUTION),
+                standard_transforms.ToTensor(),
+                standard_transforms.Normalize(mean=(0.5,), std=(0.5,)),
+            ])
+        else:
+            padding_4sides = (MODEL_RESOLUTION - config.resolution) // 2
+            transform = standard_transforms.Compose([
+                standard_transforms.CenterCrop(config.resolution),
+                standard_transforms.ToTensor(),
+                torch.nn.ReflectionPad2d(padding_4sides),
+                standard_transforms.Normalize(mean=(0.5,), std=(0.5,)),
+            ])
+        dataset = CustomImageDataset(config.data_path, transform=transform)
+        dataloader = DataLoader(dataset, batch_size=config.batch_size, shuffle=False, drop_last=False,
+                               num_workers=8, pin_memory=True, persistent_workers=True)
+    else:
+        raise ValueError(f"Invalid data type: {config.data_type}. Must be one of ['lmdb', 'folder-img']")
+    autoencoder = libs.autoencoder.get_model(**config.autoencoder)
+    autoencoder.to(device)
+    @profiler.profile
+    def split_joint(x, z_imgs, config):
+        """
+        Input:
+        x:      (B, C, H, W)
+                is only the modalities that are being denoised
+        z_imgs: (M, B, C, H, W)
+                the original img_latents for all modalities
+                (but we only use the ones for the modalities that are being denoised)
+        config: config
+        First, split the input into the modalities into correct shape
+        Second, return a full list of the modalities,
+        including the ones being conditioned on and the ones being ignored.
+        Returns list of all modalities (some are denoised, some are conditioned on, some are ignored)
+        """
+        C, H, W = config.z_shape
+        z_dim = C * H * W
+        z_generated = x.split([z_dim] * len(config.generate_modalities), dim=1)
+        z_generated = {modality: einops.rearrange(z_i, 'B (C H W) -> B C H W', C=C, H=H, W=W)
+                       for z_i, modality in zip(z_generated, config.generate_modalities)}
+        z = []
+        for i, modality in enumerate(config.modalities):
+            # Modalities that are being denoised
+            if modality in config.generate_modalities:
+                z.append(z_generated[modality])
+            # Modalities that are being conditioned on
+            elif modality in config.condition_modalities:
+                z.append(z_imgs[i])
+            # Modalities that are ignored
+            else:
+                z.append(torch.randn(x.shape[0], C, H, W, device=device))
+        return z
+    @profiler.profile
+    def combine_joint(z):
+        """
+        Input:
+        z: list of ONLY the modalities that are being denoised
+        Returns:
+        z: (B, C * H * W)
+        """
+        z = torch.concat([einops.rearrange(z_i, 'B C H W -> B (C H W)') for z_i in z], dim=-1)
+        return z
+    @torch.cuda.amp.autocast()
+    @profiler.profile
+    def encode(_batch):
+        return autoencoder.encode(_batch)
+    @torch.cuda.amp.autocast()
+    @profiler.profile
+    def decode(_batch):
+        return autoencoder.decode(_batch)
+    def get_data_generator():
+        # Run single epoch
+        for data in tqdm(dataloader, desc='epoch'):
+            yield data
+    logging.info("Num of modalities: %d", config.num_modalities)
+    logging.info("Num of images in dataloader: %d", len(dataloader))
+    logging.info("Generate modalities: %s", config.generate_modalities)
+    logging.info("Condition modalities: %s", config.condition_modalities)
+    logging.info("Condition modalities mask: %s", config.condition_modalities_mask)
+    logging.info("Generate modalities mask: %s", config.generate_modalities_mask)
+    logging.info(f'N={N}')
+    @profiler.profile
+    def run_nnet(x, t, z_imgs):
+        timesteps = [t if mask else torch.zeros_like(t) for mask in config.generate_modalities_mask]
+        # ==== EXPAND TO ALL MODALITIES ====
+        z = split_joint(x, z_imgs, config=config)
+        # z = {modality1: z_generated_modality1, modality2: z_conditioned_modality2, ...}
+        # == DEBUG CODE: Decode, unprocess, and save both modalities side by side
+        # z_decoded_1 = decode(z[0])
+        # z_decoded_2 = decode(z[1])
+        # z_decoded_1 = unpreprocess(z_decoded_1)
+        # z_decoded_2 = unpreprocess(z_decoded_2)
+        # z_decoded_combined = torch.cat([z_decoded_1, z_decoded_2], dim=-1)  # Concatenate along width dimension
+        # print(f"saving image z_decoded_combined_{t}.png")
+        # save_image(z_decoded_combined, os.path.join(config.output_path, f"z_supeeerdecoded_{t}.png"))
+        # == DEBUG CODE END ==
+        """
+        nnet expects:
+         - z: (M, B, C, H, W)
+         - t_imgs: (M, B)
+        where M is the number of modalities.
+        That is, z should be a list of M batches, each batch corresponding to a modality.
+        E.g. num_modalities(M)=3, batch_size(B)=16, z_shape(C, H, W)=(4, 32, 32) ->
+           z = [(16, 4, 32, 32), (16, 4, 32, 32), (16, 4, 32, 32)]
+           t_imgs = [(16,), (16,), (16,)]
+        """
+        z_out = nnet(z, t_imgs=timesteps)
+        # ==== SELECT ONLY THE GENERATED MODALITIES for the denoising process ====
+        z_out_generated = [z_out[i]
+                            for i, modality in enumerate(config.modalities)
+                                if modality in config.generate_modalities]
+        x_out = combine_joint(z_out_generated)
+        if config.sample.scale == 0.:
+            return x_out
+        return x_out  # TODO: Implement classifier-free guidance if there is time
+    @profiler.profile
+    def sample_fn(z_imgs, **kwargs):
+        # Calculate effective batch size
+        effective_batch_size = z_imgs[0].shape[0]
+        # Generate random initial noise for the modalities being generated/denoised
+        _z_init = torch.randn(len(config.generate_modalities), effective_batch_size, *z_imgs[0].shape[1:], device=device)
+        _x_init = combine_joint(_z_init)
+        noise_schedule = NoiseScheduleVP(schedule='discrete', betas=torch.tensor(_betas, device=device).float())
+        @profiler.profile
+        def model_fn(x, t_continuous):
+            t = t_continuous * N
+            return run_nnet(x, t, z_imgs)
+        dpm_solver = DPM_Solver(model_fn, noise_schedule, predict_x0=True, thresholding=False)
+        with torch.no_grad():
+            with torch.autocast(device_type=device):
+                start_time = time.time()
+                x = dpm_solver.sample(_x_init, steps=config.sample.sample_steps, eps=1. / N, T=1.)
+                end_time = time.time()
+                print(f'\ngenerate {config.batch_size} samples with {config.sample.sample_steps} steps takes {end_time - start_time:.2f}s')
+        _zs = split_joint(x, z_imgs, config=config)
+        # Replace the conditional modalities with the original images
+        for i, mask in enumerate(config.condition_modalities_mask):
+            if mask:
+                _zs[i] = z_imgs[i]
+        return _zs
+    data_generator = get_data_generator()
+    for idx_batch, batch in enumerate(data_generator):
+        batch_start_time = time.time()
+        # Unpack the batch into images and filenames
+        original_images, original_filenames = batch
+        # print(filenames)
+        # Track data loading and preprocessing time
+        preprocess_start = time.time()
+        images = tree_map(lambda x: x.to(device), original_images)
+        # In addition to preparing the contexts (returns mean and variance),
+        # we need to actually sample the values from the distribution
+        img_contexts, filenames = prepare_contexts(config, images, original_filenames, device=device, autoencoder=autoencoder)
+        z_imgs = torch.stack([autoencoder.sample(img_context) for img_context in img_contexts])
+        preprocess_time = time.time() - preprocess_start
+        # Track sampling time
+        sample_start = time.time()
+        _zs = sample_fn(z_imgs)
+        sample_time = time.time() - sample_start
+        # Track decoding time
+        decode_start = time.time()
+        samples_unstacked = [unpreprocess(decode(_z)) for _z in _zs]
+        # Crop back to input resolution if it is smaller than MODEL_RESOLUTION
+        if config.resolution < MODEL_RESOLUTION:
+            samples_unstacked = [standard_transforms.functional.center_crop(sample, output_size=config.resolution)
+                                    for sample in samples_unstacked]
+        samples = torch.stack(samples_unstacked, dim=0)
+        decode_time = time.time() - decode_start
+        # Track saving time
+        save_start = time.time()
+        if config.save_as == 'grid':
+            b = samples.shape[1]  # batch size
+            # Properly interleave samples from all modalities
+            # For each sample index, get all modalities before moving to next sample
+            samples = torch.stack([samples[j, i] for i in range(b) for j in range(config.nnet.num_modalities)]).view(-1, *samples.shape[2:])
+            # If the number of modalities is 3 then we plot in 9 columns
+            n_cols = 9 if config.nnet.num_modalities == 3 else 8
+            samples = make_grid(samples, n_cols)
+            save_path = os.path.join(config.output_path, f'grid_{idx_batch}.png')
+            save_image(samples, save_path)
+            # plot_real_images = '/home/s2254242/projects/pangaea_terramind/data/test_set_1/test' # We want to plot into a grid_real_images_{idx_batch}.png the real images
+            plot_real_images = ''
+            if plot_real_images != '':
+                # Load real images from files
+                real_images_list = []
+                for filename in original_filenames:
+                    for modality in config.modalities:
+                        img_path = os.path.join(plot_real_images, modality, f"{filename}.png")
+                        img = Image.open(img_path).convert("RGB")
+                        img_tensor = standard_transforms.ToTensor()(img)
+                        real_images_list.append(img_tensor)
+                # Stack and create grid
+                real_images = torch.stack(real_images_list)
+                real_grid = make_grid(real_images, n_cols)
+                real_save_path = os.path.join(config.output_path, f'grid_real_{idx_batch}.png')
+                save_image(real_grid, real_save_path)
+        elif config.save_as == 'pngs':
+            # Define a helper function to save a single image
+            def save_single_image(args):
+                modality_idx, modality, b_idx = args
+                filename = filenames[b_idx] if isinstance(filenames, list) else filenames
+                save_path = os.path.join(os.path.join(config.output_path, modality), f"{filename}.png")
+                save_image(samples[modality_idx][b_idx], save_path)
+            # Create a list of all save operations needed
+            save_tasks = []
+            for i, modality in enumerate(config.modalities):
+                if modality in config.generate_modalities:
+                    modality_dir = os.path.join(config.output_path, modality)
+                    for b_idx in range(samples[i].shape[0]):
+                        save_tasks.append((i, modality, b_idx))
+            # Use ThreadPoolExecutor to parallelize the saving process
+            max_workers = min(16, len(save_tasks))  # Limit to 16 threads max
+            if max_workers > 0:  # Only create pool if there are tasks
+                print(f"Saving {len(save_tasks)} images using {max_workers} threads...")
+                with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                    list(tqdm(executor.map(save_single_image, save_tasks), total=len(save_tasks), desc="Saving images"))
+        elif config.data_type == 'folder-img':
+            # Get indices for all modalities we want to save
+            save_modalities = ['s1_rtc', 's2_l2a']
+            # append_real_from_paths = ['data/pastis_pngs/sar/']
+            modality_indices = [config.modalities.index(m) for m in save_modalities]
+            for i in range(min(config.batch_size, len(filenames))):
+                # Stack the samples from different modalities horizontally
+                concat_samples = torch.cat([samples[idx, i] for idx in modality_indices], dim=2)
+                # Append real images from specified paths
+                real_images = []
+                for real_path in append_real_from_paths:
+                    real_img_path = os.path.join(real_path, filenames[i])
+                    real_img = Image.open(real_img_path).convert("RGB")
+                    real_img_tensor = standard_transforms.ToTensor()(real_img)
+                    real_images.append(real_img_tensor)
+                real_images_tensor = torch.cat(real_images, dim=2) if len(real_images) > 1 else real_images[0]
+                concat_samples = torch.cat([concat_samples, real_images_tensor.to(device)], dim=2)
+                save_path = os.path.join(config.output_path, filenames[i])
+                save_image(concat_samples, save_path)
+        save_time = time.time() - save_start
+        batch_total_time = time.time() - batch_start_time
+        print(f'\nBatch {idx_batch} timing:')
+        print(f'  Preprocessing: {preprocess_time:.2f}s ({preprocess_time/batch_total_time*100:.1f}%)')
+        print(f'  Sampling:      {sample_time:.2f}s ({sample_time/batch_total_time*100:.1f}%)')
+        print(f'  Decoding:      {decode_time:.2f}s ({decode_time/batch_total_time*100:.1f}%)')
+        print(f'  Saving:        {save_time:.2f}s ({save_time/batch_total_time*100:.1f}%)')
+        print(f'  Total:         {batch_total_time:.2f}s')
+        print(f'\nGPU memory usage: {torch.cuda.max_memory_reserved() / 1024 ** 3:.2f} GB')
+        print(f'\nresults are saved in {os.path.join(config.output_path)} :)')
+        # After processing, display the profiling summary
+        if idx_batch % 5 == 0 or idx_batch == len(dataloader) - 1:
+            profiler.summary()
+from absl import flags
+from absl import app
+from ml_collections import config_flags
+import os
+FLAGS = flags.FLAGS
+config_flags.DEFINE_config_file(
+    "config", None, "Configuration.", lock_config=False)
+flags.DEFINE_string("data_path", None, "Path to the data")
+flags.DEFINE_string("data_type", 'lmdb', "Type of data to load (lmdb, folder-img)")
+flags.DEFINE_string("nnet_path", None, "The nnet to evaluate.")
+flags.DEFINE_string("output_path", None, "The path to save the generated images")
+flags.DEFINE_integer("n_mod", None, "Number of modalities")
+flags.DEFINE_integer("n_samples", 1, "The number of samples to generate with the same condition")
+flags.DEFINE_string("generate", None, "Comma-separated list of modalities to generate (s2_l1c,s2_l2a,s1_rtc,dem)")
+flags.DEFINE_string("condition", None, "Comma-separated list of modalities to condition on (s2_l1c,s2_l2a,s1_rtc,dem)")
+flags.DEFINE_string("save_as", 'grid', "How to save the generated images (grid, pngs)")
+flags.DEFINE_integer("resolution", 256, "The resolution of the images to generate")
+flags.DEFINE_integer("seed", None, "Random seed for reproducibility (overrides config seed)")
+def main(argv):
+    config = FLAGS.config
+    config.nnet_path = FLAGS.nnet_path
+    config.data_path = FLAGS.data_path
+    config.save_as = FLAGS.save_as
+    config.n_samples = FLAGS.n_samples if FLAGS.n_samples else 1
+    config.resolution = FLAGS.resolution
+    # Override seed if provided from command line
+    if FLAGS.seed is not None:
+        config.seed = FLAGS.seed
+    # batch_size controls the number of unique conditional images we use
+    config.batch_size = 6
+    config.modalities = MODALITIES[FLAGS.n_mod]
+    if FLAGS.generate is None:
+        raise ValueError("--generate flag is mandatory")
+    # Parse generate and condition modalities
+    config.generate_modalities = FLAGS.generate.split(',')
+    config.condition_modalities = FLAGS.condition.split(',') if FLAGS.condition else []
+    # Sort the modalities by the order of the config.modalities
+    config.generate_modalities = sorted(config.generate_modalities, key=lambda x: config.modalities.index(x))
+    config.condition_modalities = sorted(config.condition_modalities, key=lambda x: config.modalities.index(x))
+    config.generate_modalities_mask = [mod in config.generate_modalities for mod in config.modalities]
+    config.condition_modalities_mask = [mod in config.condition_modalities for mod in config.modalities]
+    # Validate modalities
+    valid_modalities = {'s2_l1c', 's2_l2a', 's1_rtc', 'dem'}
+    for mod in config.generate_modalities + config.condition_modalities:
+        if mod not in valid_modalities:
+            raise ValueError(f"Invalid modality: {mod}. Must be one of {valid_modalities}")
+    # Check that generate and condition modalities don't overlap
+    if set(config.generate_modalities) & set(config.condition_modalities):
+        raise ValueError("Generate and condition modalities must be different")
+    if FLAGS.data_type == 'lmdb':
+        # Check that there exists a data.mdb and a lock.mdb in the data path
+        if not os.path.exists(os.path.join(config.data_path, 'data.mdb')):
+            raise ValueError(f"data.mdb does not exist in {config.data_path}")
+        if not os.path.exists(os.path.join(config.data_path, 'lock.mdb')):
+            raise ValueError(f"lock.mdb does not exist in {config.data_path}")
+    elif FLAGS.data_type == 'folder-img':
+        # raise NotImplementedError("Folder-img data type not implemented")
+        pass
+    else:
+        raise ValueError(f"Invalid data type: {FLAGS.data_type}. Must be one of ['lmdb', 'folder-img']")
+    config.data_type = FLAGS.data_type
+    assert config.nnet.num_modalities == FLAGS.n_mod, "Number of modalities in the nnet must match the number of modalities in the command line arguments"
+    config.num_modalities = FLAGS.n_mod
+    # Format the output path based on conditions and modalities
+    clean_generate = [mod.replace('_', '') for mod in config.generate_modalities]
+    if config.condition_modalities:
+        clean_condition = [mod.replace('_', '') for mod in config.condition_modalities]
+        output_dir = f"condition_{'_'.join(clean_condition)}_generate_{'_'.join(clean_generate)}_{config.n_samples}samples"
+    else:
+        output_dir = f"generate_{'_'.join(clean_generate)}_{config.n_samples}samples"
+    if config.save_as == 'grid':
+        config.output_path = os.path.join(FLAGS.output_path, 'grids', output_dir)
+    else:
+        config.output_path = os.path.join(FLAGS.output_path, output_dir)
+    evaluate(config)
+    # Print final profiling summary
+    print("\n===== FINAL PROFILING SUMMARY =====")
+    profiler.summary()
+if __name__ == "__main__":
+    app.run(main)

src/COP-GEN-Beta/scripts/download_rome.sh ADDED Viewed

	@@ -0,0 +1,18 @@

+#!/bin/bash
+DATA_DIR="./data/majorTOM"
+START_DATE="2017-01-01"
+END_DATE="2025-01-01"
+SOURCES=("Core-S2L2A" "Core-S2L1C" "Core-S1RTC" "Core-DEM")
+python3 majortom/download_world.py \
+    --data-dir $DATA_DIR \
+    --sources "${SOURCES[@]}" \
+    --start-date $START_DATE \
+    --end-date $END_DATE \
+    --cloud-cover 0 10 \
+    --subset-name "rome" \
+    --bbox 12.2 41.6 13.0 42.2 \
+    --criteria "latest" \
+    --n-samples 10 \
+    --seed 42

src/COP-GEN-Beta/tools/extract_parquet.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os
+import sys
+import pyarrow.parquet as pq
+import pandas as pd
+import json
+from pathlib import Path
+import argparse
+def parse_args():
+    parser = argparse.ArgumentParser(description='Extract all content from a parquet file')
+    parser.add_argument('--parquet-file', type=str, required=True,
+                        help='Name of the parquet file to extract')
+    parser.add_argument('--output-dir', type=str, default='./extracted_data',
+                        help='Directory to save extracted data (default: ./extracted_data)')
+    return parser.parse_args()
+def extract_parquet_content(parquet_path, output_dir):
+    """Extract all content from a parquet file and save it to the output directory"""
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    print(f"Extracting data from {parquet_path} to {output_dir}")
+    # Open the parquet file
+    pf = pq.ParquetFile(parquet_path)
+    print(f"File contains {pf.num_row_groups} row groups")
+    # Process each row group
+    for rg_idx in range(pf.num_row_groups):
+        print(f"\nProcessing row group {rg_idx+1}/{pf.num_row_groups}")
+        # Read the row group
+        table = pf.read_row_group(rg_idx)
+        df = table.to_pandas()
+        # Create a directory for this row group
+        if pf.num_row_groups > 1:
+            rg_dir = output_dir / f"row_group_{rg_idx}"
+        else:
+            rg_dir = output_dir
+        rg_dir.mkdir(exist_ok=True)
+        # Get metadata to create more meaningful directory names if possible
+        product_id = df['product_id'][0] if 'product_id' in df.columns else f"sample_{rg_idx}"
+        grid_cell = df['grid_cell'][0] if 'grid_cell' in df.columns else ""
+        # Create a more descriptive directory name if possible
+        sample_dir = rg_dir / f"{grid_cell}_{product_id}" if grid_cell else rg_dir / product_id
+        sample_dir.mkdir(exist_ok=True)
+        # Extract and save metadata to JSON
+        metadata = {}
+        for col in df.columns:
+            if df[col].dtype != 'object' or (len(df[col]) > 0 and not isinstance(df[col].iloc[0], bytes)):
+                # Convert non-binary data to JSON-serializable format
+                try:
+                    if col == 'timestamp' and pd.api.types.is_datetime64_any_dtype(df[col]):
+                        metadata[col] = df[col].iloc[0].strftime('%Y-%m-%d %H:%M:%S')
+                    else:
+                        value = df[col].iloc[0]
+                        # Handle numpy types
+                        if hasattr(value, 'item'):
+                            metadata[col] = value.item()
+                        else:
+                            metadata[col] = value
+                except Exception as e:
+                    metadata[col] = f"Error converting: {str(e)}"
+        # Save metadata
+        with open(sample_dir / "metadata.json", "w") as f:
+            json.dump(metadata, f, indent=2, default=str)
+        # Extract and save binary data
+        binary_columns = []
+        for col in df.columns:
+            if df[col].dtype == 'object' and len(df[col]) > 0 and isinstance(df[col].iloc[0], bytes):
+                binary_columns.append(col)
+                binary_data = df[col].iloc[0]
+                # Determine file extension based on common column naming conventions
+                if col == 'thumbnail':
+                    extension = '.png'
+                elif col.startswith('B') and col[1:].isdigit():  # Sentinel-2 bands
+                    extension = '.tif'
+                elif col in ['vv', 'vh']:  # Sentinel-1 bands
+                    extension = '.tif'
+                elif col == 'DEM':  # DEM data
+                    extension = '.tif'
+                elif col == 'cloud_mask':
+                    extension = '.tif'
+                else:
+                    extension = '.bin'  # Generic binary data
+                # Save binary data
+                file_path = sample_dir / f"{col}{extension}"
+                with open(file_path, "wb") as f:
+                    f.write(binary_data)
+                print(f"  Saved {col}{extension}, size: {len(binary_data)/1024:.1f} KB")
+        print(f"  Extracted metadata and {len(binary_columns)} binary files to {sample_dir}")
+def main():
+    args = parse_args()
+    parquet_path = Path(args.parquet_file)
+    if not parquet_path.exists():
+        print(f"Error: File {parquet_path} not found")
+        sys.exit(1)
+    # Extract all content
+    extract_parquet_content(parquet_path, args.output_dir)
+    print("\nExtraction complete!")
+if __name__ == "__main__":
+    main()

src/COP-GEN-Beta/tools/fid_score.py ADDED Viewed

	@@ -0,0 +1,260 @@

+"""Calculates the Frechet Inception Distance (FID) to evalulate GANs
+The FID metric calculates the distance between two distributions of images.
+Typically, we have summary statistics (mean & covariance matrix) of one
+of these distributions, while the 2nd distribution is given by a GAN.
+When run as a stand-alone program, it compares the distribution of
+images that are stored as PNG/JPEG at a specified location with a
+distribution given by summary statistics (in pickle format).
+The FID is calculated by assuming that X_1 and X_2 are the activations of
+the pool_3 layer of the inception net for generated samples and real world
+samples respectively.
+See --help to see further details.
+Code apapted from https://github.com/bioinf-jku/TTUR to use PyTorch instead
+of Tensorflow
+Copyright 2018 Institute of Bioinformatics, JKU Linz
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+import pathlib
+import numpy as np
+import torch
+import torchvision.transforms as TF
+from PIL import Image
+from scipy import linalg
+from torch.nn.functional import adaptive_avg_pool2d
+try:
+    from tqdm import tqdm
+except ImportError:
+    # If tqdm is not available, provide a mock version of it
+    def tqdm(x):
+        return x
+from .inception import InceptionV3
+IMAGE_EXTENSIONS = {'bmp', 'jpg', 'jpeg', 'pgm', 'png', 'ppm',
+                    'tif', 'tiff', 'webp'}
+class ImagePathDataset(torch.utils.data.Dataset):
+    def __init__(self, files, transforms=None):
+        self.files = files
+        self.transforms = transforms
+    def __len__(self):
+        return len(self.files)
+    def __getitem__(self, i):
+        path = self.files[i]
+        img = Image.open(path).convert('RGB')
+        if self.transforms is not None:
+            img = self.transforms(img)
+        return img
+def get_activations(files, model, batch_size=50, dims=2048, device='cpu', num_workers=8):
+    """Calculates the activations of the pool_3 layer for all images.
+    Params:
+    -- files       : List of image files paths
+    -- model       : Instance of inception model
+    -- batch_size  : Batch size of images for the model to process at once.
+                     Make sure that the number of samples is a multiple of
+                     the batch size, otherwise some samples are ignored. This
+                     behavior is retained to match the original FID score
+                     implementation.
+    -- dims        : Dimensionality of features returned by Inception
+    -- device      : Device to run calculations
+    -- num_workers : Number of parallel dataloader workers
+    Returns:
+    -- A numpy array of dimension (num images, dims) that contains the
+       activations of the given tensor when feeding inception with the
+       query tensor.
+    """
+    model.eval()
+    if batch_size > len(files):
+        print(('Warning: batch size is bigger than the data size. '
+               'Setting batch size to data size'))
+        batch_size = len(files)
+    dataset = ImagePathDataset(files, transforms=TF.ToTensor())
+    dataloader = torch.utils.data.DataLoader(dataset,
+                                             batch_size=batch_size,
+                                             shuffle=False,
+                                             drop_last=False,
+                                             num_workers=num_workers)
+    pred_arr = np.empty((len(files), dims))
+    start_idx = 0
+    for batch in tqdm(dataloader):
+        batch = batch.to(device)
+        with torch.no_grad():
+            pred = model(batch)[0]
+        # If model output is not scalar, apply global spatial average pooling.
+        # This happens if you choose a dimensionality not equal 2048.
+        if pred.size(2) != 1 or pred.size(3) != 1:
+            pred = adaptive_avg_pool2d(pred, output_size=(1, 1))
+        pred = pred.squeeze(3).squeeze(2).cpu().numpy()
+        pred_arr[start_idx:start_idx + pred.shape[0]] = pred
+        start_idx = start_idx + pred.shape[0]
+    return pred_arr
+def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
+    """Numpy implementation of the Frechet Distance.
+    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
+    and X_2 ~ N(mu_2, C_2) is
+            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
+    Stable version by Dougal J. Sutherland.
+    Params:
+    -- mu1   : Numpy array containing the activations of a layer of the
+               inception net (like returned by the function 'get_predictions')
+               for generated samples.
+    -- mu2   : The sample mean over activations, precalculated on an
+               representative data set.
+    -- sigma1: The covariance matrix over activations for generated samples.
+    -- sigma2: The covariance matrix over activations, precalculated on an
+               representative data set.
+    Returns:
+    --   : The Frechet Distance.
+    """
+    mu1 = np.atleast_1d(mu1)
+    mu2 = np.atleast_1d(mu2)
+    sigma1 = np.atleast_2d(sigma1)
+    sigma2 = np.atleast_2d(sigma2)
+    assert mu1.shape == mu2.shape, \
+        'Training and test mean vectors have different lengths'
+    assert sigma1.shape == sigma2.shape, \
+        'Training and test covariances have different dimensions'
+    diff = mu1 - mu2
+    # Product might be almost singular
+    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
+    if not np.isfinite(covmean).all():
+        msg = ('fid calculation produces singular product; '
+               'adding %s to diagonal of cov estimates') % eps
+        print(msg)
+        offset = np.eye(sigma1.shape[0]) * eps
+        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
+    # Numerical error might give slight imaginary component
+    if np.iscomplexobj(covmean):
+        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
+            m = np.max(np.abs(covmean.imag))
+            raise ValueError('Imaginary component {}'.format(m))
+        covmean = covmean.real
+    tr_covmean = np.trace(covmean)
+    return (diff.dot(diff) + np.trace(sigma1)
+            + np.trace(sigma2) - 2 * tr_covmean)
+def calculate_activation_statistics(files, model, batch_size=50, dims=2048,
+                                    device='cpu', num_workers=8):
+    """Calculation of the statistics used by the FID.
+    Params:
+    -- files       : List of image files paths
+    -- model       : Instance of inception model
+    -- batch_size  : The images numpy array is split into batches with
+                     batch size batch_size. A reasonable batch size
+                     depends on the hardware.
+    -- dims        : Dimensionality of features returned by Inception
+    -- device      : Device to run calculations
+    -- num_workers : Number of parallel dataloader workers
+    Returns:
+    -- mu    : The mean over samples of the activations of the pool_3 layer of
+               the inception model.
+    -- sigma : The covariance matrix of the activations of the pool_3 layer of
+               the inception model.
+    """
+    act = get_activations(files, model, batch_size, dims, device, num_workers)
+    mu = np.mean(act, axis=0)
+    sigma = np.cov(act, rowvar=False)
+    return mu, sigma
+def compute_statistics_of_path(path, model, batch_size, dims, device, num_workers=8):
+    if path.endswith('.npz'):
+        with np.load(path) as f:
+            m, s = f['mu'][:], f['sigma'][:]
+    else:
+        path = pathlib.Path(path)
+        files = sorted([file for ext in IMAGE_EXTENSIONS
+                       for file in path.glob('*.{}'.format(ext))])
+        m, s = calculate_activation_statistics(files, model, batch_size,
+                                               dims, device, num_workers)
+    return m, s
+def save_statistics_of_path(path, out_path, device=None, batch_size=50, dims=2048, num_workers=8):
+    if device is None:
+        device = torch.device('cuda' if (torch.cuda.is_available()) else 'cpu')
+    else:
+        device = torch.device(device)
+    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+    model = InceptionV3([block_idx]).to(device)
+    m1, s1 = compute_statistics_of_path(path, model, batch_size, dims, device, num_workers)
+    np.savez(out_path, mu=m1, sigma=s1)
+def calculate_fid_given_paths(paths, device=None, batch_size=50, dims=2048, num_workers=8):
+    """Calculates the FID of two paths"""
+    if device is None:
+        device = torch.device('cuda' if (torch.cuda.is_available()) else 'cpu')
+    else:
+        device = torch.device(device)
+    for p in paths:
+        if not os.path.exists(p):
+            raise RuntimeError('Invalid path: %s' % p)
+    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+    model = InceptionV3([block_idx]).to(device)
+    m1, s1 = compute_statistics_of_path(paths[0], model, batch_size,
+                                        dims, device, num_workers)
+    m2, s2 = compute_statistics_of_path(paths[1], model, batch_size,
+                                        dims, device, num_workers)
+    fid_value = calculate_frechet_distance(m1, s1, m2, s2)
+    return fid_value

src/COP-GEN-Beta/tools/inception.py ADDED Viewed

	@@ -0,0 +1,328 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+try:
+    from torchvision.models.utils import load_state_dict_from_url
+except ImportError:
+    from torch.utils.model_zoo import load_url as load_state_dict_from_url
+# Inception weights ported to Pytorch from
+# http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz
+FID_WEIGHTS_URL = 'https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth'  # noqa: E501
+class InceptionV3(nn.Module):
+    """Pretrained InceptionV3 network returning feature maps"""
+    # Index of default block of inception to return,
+    # corresponds to output of final average pooling
+    DEFAULT_BLOCK_INDEX = 3
+    # Maps feature dimensionality to their output blocks indices
+    BLOCK_INDEX_BY_DIM = {
+        64: 0,   # First max pooling features
+        192: 1,  # Second max pooling featurs
+        768: 2,  # Pre-aux classifier features
+        2048: 3  # Final average pooling features
+    }
+    def __init__(self,
+                 output_blocks=(DEFAULT_BLOCK_INDEX,),
+                 resize_input=True,
+                 normalize_input=True,
+                 requires_grad=False,
+                 use_fid_inception=True):
+        """Build pretrained InceptionV3
+        Parameters
+        ----------
+        output_blocks : list of int
+            Indices of blocks to return features of. Possible values are:
+                - 0: corresponds to output of first max pooling
+                - 1: corresponds to output of second max pooling
+                - 2: corresponds to output which is fed to aux classifier
+                - 3: corresponds to output of final average pooling
+        resize_input : bool
+            If true, bilinearly resizes input to width and height 299 before
+            feeding input to model. As the network without fully connected
+            layers is fully convolutional, it should be able to handle inputs
+            of arbitrary size, so resizing might not be strictly needed
+        normalize_input : bool
+            If true, scales the input from range (0, 1) to the range the
+            pretrained Inception network expects, namely (-1, 1)
+        requires_grad : bool
+            If true, parameters of the model require gradients. Possibly useful
+            for finetuning the network
+        use_fid_inception : bool
+            If true, uses the pretrained Inception model used in Tensorflow's
+            FID implementation. If false, uses the pretrained Inception model
+            available in torchvision. The FID Inception model has different
+            weights and a slightly different structure from torchvision's
+            Inception model. If you want to compute FID scores, you are
+            strongly advised to set this parameter to true to get comparable
+            results.
+        """
+        super(InceptionV3, self).__init__()
+        self.resize_input = resize_input
+        self.normalize_input = normalize_input
+        self.output_blocks = sorted(output_blocks)
+        self.last_needed_block = max(output_blocks)
+        assert self.last_needed_block <= 3, \
+            'Last possible output block index is 3'
+        self.blocks = nn.ModuleList()
+        if use_fid_inception:
+            inception = fid_inception_v3()
+        else:
+            inception = _inception_v3(pretrained=True)
+        # Block 0: input to maxpool1
+        block0 = [
+            inception.Conv2d_1a_3x3,
+            inception.Conv2d_2a_3x3,
+            inception.Conv2d_2b_3x3,
+            nn.MaxPool2d(kernel_size=3, stride=2)
+        ]
+        self.blocks.append(nn.Sequential(*block0))
+        # Block 1: maxpool1 to maxpool2
+        if self.last_needed_block >= 1:
+            block1 = [
+                inception.Conv2d_3b_1x1,
+                inception.Conv2d_4a_3x3,
+                nn.MaxPool2d(kernel_size=3, stride=2)
+            ]
+            self.blocks.append(nn.Sequential(*block1))
+        # Block 2: maxpool2 to aux classifier
+        if self.last_needed_block >= 2:
+            block2 = [
+                inception.Mixed_5b,
+                inception.Mixed_5c,
+                inception.Mixed_5d,
+                inception.Mixed_6a,
+                inception.Mixed_6b,
+                inception.Mixed_6c,
+                inception.Mixed_6d,
+                inception.Mixed_6e,
+            ]
+            self.blocks.append(nn.Sequential(*block2))
+        # Block 3: aux classifier to final avgpool
+        if self.last_needed_block >= 3:
+            block3 = [
+                inception.Mixed_7a,
+                inception.Mixed_7b,
+                inception.Mixed_7c,
+                nn.AdaptiveAvgPool2d(output_size=(1, 1))
+            ]
+            self.blocks.append(nn.Sequential(*block3))
+        for param in self.parameters():
+            param.requires_grad = requires_grad
+    def forward(self, inp):
+        """Get Inception feature maps
+        Parameters
+        ----------
+        inp : torch.autograd.Variable
+            Input tensor of shape Bx3xHxW. Values are expected to be in
+            range (0, 1)
+        Returns
+        -------
+        List of torch.autograd.Variable, corresponding to the selected output
+        block, sorted ascending by index
+        """
+        outp = []
+        x = inp
+        if self.resize_input:
+            x = F.interpolate(x,
+                              size=(299, 299),
+                              mode='bilinear',
+                              align_corners=False)
+        if self.normalize_input:
+            x = 2 * x - 1  # Scale from range (0, 1) to range (-1, 1)
+        for idx, block in enumerate(self.blocks):
+            x = block(x)
+            if idx in self.output_blocks:
+                outp.append(x)
+            if idx == self.last_needed_block:
+                break
+        return outp
+def _inception_v3(*args, **kwargs):
+    """Wraps `torchvision.models.inception_v3`
+    Skips default weight inititialization if supported by torchvision version.
+    See https://github.com/mseitzer/pytorch-fid/issues/28.
+    """
+    try:
+        version = tuple(map(int, torchvision.__version__.split('.')[:2]))
+    except ValueError:
+        # Just a caution against weird version strings
+        version = (0,)
+    if version >= (0, 6):
+        kwargs['init_weights'] = False
+    return torchvision.models.inception_v3(*args, **kwargs)
+def fid_inception_v3():
+    """Build pretrained Inception model for FID computation
+    The Inception model for FID computation uses a different set of weights
+    and has a slightly different structure than torchvision's Inception.
+    This method first constructs torchvision's Inception and then patches the
+    necessary parts that are different in the FID Inception model.
+    """
+    inception = _inception_v3(num_classes=1008,
+                              aux_logits=False,
+                              pretrained=False)
+    inception.Mixed_5b = FIDInceptionA(192, pool_features=32)
+    inception.Mixed_5c = FIDInceptionA(256, pool_features=64)
+    inception.Mixed_5d = FIDInceptionA(288, pool_features=64)
+    inception.Mixed_6b = FIDInceptionC(768, channels_7x7=128)
+    inception.Mixed_6c = FIDInceptionC(768, channels_7x7=160)
+    inception.Mixed_6d = FIDInceptionC(768, channels_7x7=160)
+    inception.Mixed_6e = FIDInceptionC(768, channels_7x7=192)
+    inception.Mixed_7b = FIDInceptionE_1(1280)
+    inception.Mixed_7c = FIDInceptionE_2(2048)
+    state_dict = load_state_dict_from_url(FID_WEIGHTS_URL, progress=True)
+    inception.load_state_dict(state_dict)
+    return inception
+class FIDInceptionA(torchvision.models.inception.InceptionA):
+    """InceptionA block patched for FID computation"""
+    def __init__(self, in_channels, pool_features):
+        super(FIDInceptionA, self).__init__(in_channels, pool_features)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch5x5 = self.branch5x5_1(x)
+        branch5x5 = self.branch5x5_2(branch5x5)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1,
+                                   count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionC(torchvision.models.inception.InceptionC):
+    """InceptionC block patched for FID computation"""
+    def __init__(self, in_channels, channels_7x7):
+        super(FIDInceptionC, self).__init__(in_channels, channels_7x7)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch7x7 = self.branch7x7_1(x)
+        branch7x7 = self.branch7x7_2(branch7x7)
+        branch7x7 = self.branch7x7_3(branch7x7)
+        branch7x7dbl = self.branch7x7dbl_1(x)
+        branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1,
+                                   count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionE_1(torchvision.models.inception.InceptionE):
+    """First InceptionE block patched for FID computation"""
+    def __init__(self, in_channels):
+        super(FIDInceptionE_1, self).__init__(in_channels)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = [
+            self.branch3x3_2a(branch3x3),
+            self.branch3x3_2b(branch3x3),
+        ]
+        branch3x3 = torch.cat(branch3x3, 1)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = [
+            self.branch3x3dbl_3a(branch3x3dbl),
+            self.branch3x3dbl_3b(branch3x3dbl),
+        ]
+        branch3x3dbl = torch.cat(branch3x3dbl, 1)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1,
+                                   count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionE_2(torchvision.models.inception.InceptionE):
+    """Second InceptionE block patched for FID computation"""
+    def __init__(self, in_channels):
+        super(FIDInceptionE_2, self).__init__(in_channels)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = [
+            self.branch3x3_2a(branch3x3),
+            self.branch3x3_2b(branch3x3),
+        ]
+        branch3x3 = torch.cat(branch3x3, 1)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = [
+            self.branch3x3dbl_3a(branch3x3dbl),
+            self.branch3x3dbl_3b(branch3x3dbl),
+        ]
+        branch3x3dbl = torch.cat(branch3x3dbl, 1)
+        # Patch: The FID Inception model uses max pooling instead of average
+        # pooling. This is likely an error in this specific Inception
+        # implementation, as other Inception models use average pooling here
+        # (which matches the description in the paper).
+        branch_pool = F.max_pool2d(x, kernel_size=3, stride=1, padding=1)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)

src/COP-GEN-Beta/tools/inspect_parquet.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import os
+import sys
+import pyarrow.parquet as pq
+import pandas as pd
+from pathlib import Path
+import argparse
+def parse_args():
+    parser = argparse.ArgumentParser(description='Extract information from a parquet file')
+    parser.add_argument('--parquet-file', type=str, required=True,
+                        help='Name of the parquet file in the current directory')
+    parser.add_argument('--row-group', type=int, default=None,
+                        help='Specific row group to extract (default: all row groups)')
+    parser.add_argument('--sample-binary', action='store_true',
+                        help='Print sample of binary content (first 100 bytes)')
+    return parser.parse_args()
+def main():
+    args = parse_args()
+    parquet_path = Path(args.parquet_file)
+    if not parquet_path.exists():
+        print(f"Error: File {parquet_path} not found")
+        sys.exit(1)
+    print(f"\n--- Analyzing parquet file: {parquet_path} ---\n")
+    # Open the parquet file
+    pf = pq.ParquetFile(parquet_path)
+    # Print basic file information
+    print(f"File size: {parquet_path.stat().st_size / (1024*1024):.2f} MB")
+    print(f"Number of row groups: {pf.num_row_groups}")
+    print(f"Number of rows: {pf.metadata.num_rows}")
+    print(f"Number of columns: {len(pf.schema_arrow)}")
+    # Print schema information
+    print("\nSchema:")
+    for i, field in enumerate(pf.schema_arrow):
+        print(f"  {i+1}. {field.name}: {field.type}")
+    # Process row groups
+    row_groups = [args.row_group] if args.row_group is not None else range(pf.num_row_groups)
+    for rg_idx in row_groups:
+        if rg_idx >= pf.num_row_groups:
+            print(f"Error: Row group {rg_idx} does not exist (max: {pf.num_row_groups-1})")
+            continue
+        print(f"\n--- Row Group {rg_idx} ---")
+        # Get row group metadata
+        rg_metadata = pf.metadata.row_group(rg_idx)
+        print(f"Row count: {rg_metadata.num_rows}")
+        # Read the row group
+        table = pf.read_row_group(rg_idx)
+        df = table.to_pandas()
+        # Display information about each column
+        print("\nColumn information:")
+        for col_name in df.columns:
+            col_data = df[col_name]
+            dtype = col_data.dtype
+            if dtype == 'object':
+                # Check if it's binary data
+                if len(col_data) > 0 and isinstance(col_data.iloc[0], bytes):
+                    item_size = len(col_data.iloc[0])
+                    print(f"  {col_name}: Binary data, size: {item_size / 1024:.2f} KB")
+                    if args.sample_binary and item_size > 0:
+                        print(f"    Sample (first 100 bytes): {col_data.iloc[0][:100]}")
+                else:
+                    # For non-binary object columns
+                    print(f"  {col_name}: Object type, example: {col_data.iloc[0]}")
+            else:
+                # For numeric or other columns
+                if col_data.size > 0:
+                    print(f"  {col_name}: {dtype}, min: {col_data.min()}, max: {col_data.max()}, example: {col_data.iloc[0]}")
+                else:
+                    print(f"  {col_name}: {dtype}, empty column")
+        # Print specific metadata fields for Major-TOM dataset
+        if 'product_id' in df.columns:
+            print(f"\nProduct ID: {df['product_id'].iloc[0]}")
+        if 'grid_cell' in df.columns:
+            print(f"Grid Cell: {df['grid_cell'].iloc[0]}")
+        if 'timestamp' in df.columns:
+            print(f"Timestamp: {df['timestamp'].iloc[0]}")
+if __name__ == "__main__":
+    main()

src/COP-GEN-Beta/tools/print_parquet_urls.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import pandas as pd
+import pyarrow.parquet as pq
+import argparse
+from pathlib import Path
+def parse_args():
+    parser = argparse.ArgumentParser(description='Read metadata.parquet and print download URLs')
+    parser.add_argument('--metadata-path', type=str, required=True,
+                        help='Path to the metadata.parquet file')
+    return parser.parse_args()
+def main():
+    args = parse_args()
+    metadata_path = Path(args.metadata_path)
+    # Read the parquet file
+    print(f"Reading metadata from: {metadata_path}")
+    df = pq.read_table(metadata_path).to_pandas()
+    # Extract unique parquet URLs
+    unique_urls = df['parquet_url'].unique()
+    # Print the URLs
+    print(f"\nFound {len(unique_urls)} unique parquet file URLs:")
+    for url in unique_urls:
+        print(url)
+    print(f"\nTotal number of samples in metadata: {len(df)}")
+if __name__ == "__main__":
+    main()

src/COP-GEN-Beta/train_triffuser_discrete.py ADDED Viewed

	@@ -0,0 +1,408 @@

+import ml_collections
+import torch
+from torch import multiprocessing as mp
+from datasets import get_dataset
+from torchvision.utils import make_grid, save_image
+import utils
+import einops
+from torch.utils._pytree import tree_map
+import accelerate
+from torch.utils.data import DataLoader
+from tqdm.auto import tqdm
+from dpm_solver_pp import NoiseScheduleVP, DPM_Solver
+import tempfile
+from tools.fid_score import calculate_fid_given_paths
+from absl import logging
+import builtins
+import os
+import wandb
+import libs.autoencoder
+import numpy as np
+def stable_diffusion_beta_schedule(linear_start=0.00085, linear_end=0.0120, n_timestep=1000):
+    _betas = (
+        torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2
+    )
+    return _betas.numpy()
+def get_skip(alphas, betas):
+    N = len(betas) - 1
+    skip_alphas = np.ones([N + 1, N + 1], dtype=betas.dtype)
+    for s in range(N + 1):
+        skip_alphas[s, s + 1:] = alphas[s + 1:].cumprod()
+    skip_betas = np.zeros([N + 1, N + 1], dtype=betas.dtype)
+    for t in range(N + 1):
+        prod = betas[1: t + 1] * skip_alphas[1: t + 1, t]
+        skip_betas[:t, t] = (prod[::-1].cumsum())[::-1]
+    return skip_alphas, skip_betas
+def stp(s, ts: torch.Tensor):  # scalar tensor product
+    if isinstance(s, np.ndarray):
+        s = torch.from_numpy(s).type_as(ts)
+    extra_dims = (1,) * (ts.dim() - 1)
+    return s.view(-1, *extra_dims) * ts
+def mos(a, start_dim=1):  # mean of square
+    return a.pow(2).flatten(start_dim=start_dim).mean(dim=-1)
+class Schedule(object):  # discrete time
+    def __init__(self, _betas):
+        r""" _betas[0...999] = betas[1...1000]
+             for n>=1, betas[n] is the variance of q(xn|xn-1)
+             for n=0,  betas[0]=0
+        """
+        self._betas = _betas
+        self.betas = np.append(0., _betas)
+        self.alphas = 1. - self.betas
+        self.N = len(_betas)
+        assert isinstance(self.betas, np.ndarray) and self.betas[0] == 0
+        assert isinstance(self.alphas, np.ndarray) and self.alphas[0] == 1
+        assert len(self.betas) == len(self.alphas)
+        # skip_alphas[s, t] = alphas[s + 1: t + 1].prod()
+        self.skip_alphas, self.skip_betas = get_skip(self.alphas, self.betas)
+        self.cum_alphas = self.skip_alphas[0]  # cum_alphas = alphas.cumprod()
+        self.cum_betas = self.skip_betas[0]
+        self.snr = self.cum_alphas / self.cum_betas
+    def tilde_beta(self, s, t):
+        return self.skip_betas[s, t] * self.cum_betas[s] / self.cum_betas[t]
+    def sample(self, x0, multi_modal=False):  # sample from q(xn|x0), where n is uniform
+        if multi_modal:
+            n_list = []
+            eps_list = []
+            xn_list = []
+            for x0_i in x0:
+                n = np.random.choice(list(range(1, self.N + 1)), (len(x0_i),))
+                eps = torch.randn_like(x0_i)
+                xn = stp(self.cum_alphas[n] ** 0.5, x0_i) + stp(self.cum_betas[n] ** 0.5, eps)
+                n_list.append(torch.tensor(n, device=x0_i.device))
+                eps_list.append(eps)
+                xn_list.append(xn)
+            return n_list, eps_list, xn_list
+        else:
+            n = np.random.choice(list(range(1, self.N + 1)), (len(x0),))
+            eps = torch.randn_like(x0)
+            xn = stp(self.cum_alphas[n] ** 0.5, x0) + stp(self.cum_betas[n] ** 0.5, eps)
+            return torch.tensor(n, device=x0.device), eps, xn
+    def __repr__(self):
+        return f'Schedule({self.betas[:10]}..., {self.N})'
+def LSimple(x0, nnet, schedule, multi_modal=False, **kwargs):
+    if multi_modal:
+        n_list, eps_list, xn_list = schedule.sample(x0, multi_modal=multi_modal)  # n in {1, ..., 1000}
+        eps_pred = nnet(xn_list, n_list, **kwargs)
+        return sum(mos(n - np_) for n, np_ in zip(eps_list, eps_pred))
+    else:
+        n, eps, xn = schedule.sample(x0)  # n in {1, ..., 1000}
+        eps_pred = nnet(xn, n, **kwargs)
+    return mos(eps - eps_pred)
+def train(config):
+    if config.get('benchmark', False):
+        torch.backends.cudnn.benchmark = True
+        torch.backends.cudnn.deterministic = False
+    mp.set_start_method('spawn')
+    accelerator = accelerate.Accelerator()
+    device = accelerator.device
+    accelerate.utils.set_seed(config.seed, device_specific=True)
+    logging.info(f'Process {accelerator.process_index} using device: {device}')
+    config.mixed_precision = accelerator.mixed_precision
+    config = ml_collections.FrozenConfigDict(config)
+    assert config.train.batch_size % accelerator.num_processes == 0
+    mini_batch_size = config.train.batch_size // accelerator.num_processes
+    if accelerator.is_main_process:
+        os.makedirs(config.ckpt_root, exist_ok=True)
+        os.makedirs(config.sample_dir, exist_ok=True)
+    accelerator.wait_for_everyone()
+    if accelerator.is_main_process:
+        wandb.init(dir=os.path.abspath(config.workdir), project=f'uvit_{config.dataset.name}', config=config.to_dict(),
+                   name=config.hparams, job_type='train', mode='offline')
+        utils.set_logger(log_level='info', fname=os.path.join(config.workdir, 'output.log'))
+        logging.info(config)
+    else:
+        utils.set_logger(log_level='error')
+        builtins.print = lambda *args: None
+    logging.info(f'Run on {accelerator.num_processes} devices')
+    dataset = get_dataset(**config.dataset)
+    assert os.path.exists(dataset.fid_stat)
+    train_dataset = dataset.get_split(split='train', labeled=config.train.mode == 'cond')
+    train_dataset_loader = DataLoader(train_dataset, batch_size=mini_batch_size, shuffle=True, drop_last=True,
+                                      num_workers=8, pin_memory=True, persistent_workers=True)
+    train_state = utils.initialize_train_state(config, device)
+    nnet, nnet_ema, optimizer, train_dataset_loader = accelerator.prepare(
+        train_state.nnet, train_state.nnet_ema, train_state.optimizer, train_dataset_loader)
+    lr_scheduler = train_state.lr_scheduler
+    train_state.resume(config.ckpt_root)
+    autoencoder = libs.autoencoder.get_model(config.autoencoder.pretrained_path)
+    autoencoder.to(device)
+    @ torch.cuda.amp.autocast()
+    def encode(_batch):
+        return autoencoder.encode(_batch)
+    @ torch.cuda.amp.autocast()
+    def decode(_batch):
+        return autoencoder.decode(_batch)
+    def get_data_generator():
+        while True:
+            for data in tqdm(train_dataset_loader, disable=not accelerator.is_main_process, desc='epoch'):
+                yield data
+    data_generator = get_data_generator()
+    _betas = stable_diffusion_beta_schedule()
+    _schedule = Schedule(_betas)
+    logging.info(f'use {_schedule}')
+    def train_step(_batch):
+        _metrics = dict()
+        optimizer.zero_grad()
+        if config.train.mode == 'uncond': # Multi-modal data. Sample each modality independently
+            if config.train.multi_modal:
+                _zs = [autoencoder.sample(modality) if 'feature' in config.dataset.name else encode(modality) for modality in _batch]
+                loss = LSimple(_zs, nnet, _schedule, multi_modal=config.train.multi_modal)
+            else:
+                _z = autoencoder.sample(_batch) if 'feature' in config.dataset.name else encode(_batch)
+                loss = LSimple(_z, nnet, _schedule)
+        elif config.train.mode == 'cond':
+            _z = autoencoder.sample(_batch[0]) if 'feature' in config.dataset.name else encode(_batch[0])
+            loss = LSimple(_z, nnet, _schedule, y=_batch[1])
+        else:
+            raise NotImplementedError(config.train.mode)
+        _metrics['loss'] = accelerator.gather(loss.detach()).mean()
+        accelerator.backward(loss.mean())
+        optimizer.step()
+        lr_scheduler.step()
+        train_state.ema_update(config.get('ema_rate', 0.9999))
+        train_state.step += 1
+        return dict(lr=train_state.optimizer.param_groups[0]['lr'], **_metrics)
+    def dpm_solver_sample(_n_samples, _sample_steps, **kwargs):
+        _z_init = torch.randn(_n_samples, *config.z_shape, device=device)
+        noise_schedule = NoiseScheduleVP(schedule='discrete', betas=torch.tensor(_betas, device=device).float())
+        def model_fn(x, t_continuous):
+            t = t_continuous * _schedule.N
+            eps_pre = nnet_ema(x, t, **kwargs)
+            return eps_pre
+        dpm_solver = DPM_Solver(model_fn, noise_schedule, predict_x0=True, thresholding=False)
+        _z = dpm_solver.sample(_z_init, steps=_sample_steps, eps=1. / _schedule.N, T=1.)
+        return decode(_z)
+    def combine_joint(z):
+        z = torch.concat([einops.rearrange(z_i, 'B C H W -> B (C H W)') for z_i in z], dim=-1)
+        return z
+    def split_joint(x, n_modalities):
+        C, H, W = config.z_shape
+        z_dim = C * H * W
+        z = x.split([z_dim] * n_modalities, dim=1)
+        z = [einops.rearrange(z_i, 'B (C H W) -> B C H W', C=C, H=H, W=W) for z_i in z]
+        return z
+    def dpm_solver_sample_multi_modal(_n_modalities, _n_samples, _sample_steps, **kwargs):
+        """here"""
+        _z_init = torch.randn(_n_modalities, _n_samples, *config.z_shape, device=device)
+        _z_init = combine_joint(_z_init)
+        noise_schedule = NoiseScheduleVP(schedule='discrete', betas=torch.tensor(_betas, device=device).float())
+        def model_fn(x, t_continuous):
+            t = t_continuous * _schedule.N
+            timesteps = [t] * _n_modalities
+            z = split_joint(x, _n_modalities)
+            z_out = nnet_ema(z, t_imgs=timesteps)
+            x_out = combine_joint(z_out)
+            # eps_pre = nnet_ema(x, t, **kwargs)
+            return x_out
+        dpm_solver = DPM_Solver(model_fn, noise_schedule, predict_x0=True, thresholding=False)
+        _zs = dpm_solver.sample(_z_init, steps=_sample_steps, eps=1. / _schedule.N, T=1.)
+        _zs = split_joint(_zs, _n_modalities)
+        samples_unstacked = [decode(_z) for _z in _zs]
+        return samples_unstacked
+    def eval_step(n_samples, sample_steps):
+        logging.info(f'eval_step: n_samples={n_samples}, sample_steps={sample_steps}'
+                     f'mini_batch_size={config.sample.mini_batch_size}')
+        def sample_fn(_n_samples):
+            if config.train.mode == 'uncond':
+                kwargs = dict()
+            elif config.train.mode == 'cond':
+                kwargs = dict(y=dataset.sample_label(_n_samples, device=device))
+            else:
+                raise NotImplementedError
+            return dpm_solver_sample(_n_samples, sample_steps, **kwargs)
+        with tempfile.TemporaryDirectory() as temp_path:
+            path = config.sample.path or temp_path
+            if accelerator.is_main_process:
+                os.makedirs(path, exist_ok=True)
+            utils.sample2dir(accelerator, path, n_samples, config.sample.mini_batch_size, sample_fn, dataset.unpreprocess)
+            _fid = 0
+            if accelerator.is_main_process:
+                _fid = calculate_fid_given_paths((dataset.fid_stat, path))
+                logging.info(f'step={train_state.step} fid{n_samples}={_fid}')
+                with open(os.path.join(config.workdir, 'eval.log'), 'a') as f:
+                    print(f'step={train_state.step} fid{n_samples}={_fid}', file=f)
+                wandb.log({f'fid{n_samples}': _fid}, step=train_state.step)
+            _fid = torch.tensor(_fid, device=device)
+            _fid = accelerator.reduce(_fid, reduction='sum')
+        return _fid.item()
+    logging.info(f'Start fitting, step={train_state.step}, mixed_precision={config.mixed_precision}')
+    step_fid = []
+    while train_state.step < config.train.n_steps:
+        nnet.train()
+        batch = tree_map(lambda x: x.to(device), next(data_generator))
+        metrics = train_step(batch)
+        nnet.eval()
+        if accelerator.is_main_process and train_state.step % config.train.log_interval == 0:
+            logging.info(utils.dct2str(dict(step=train_state.step, **metrics)))
+            logging.info(config.workdir)
+            wandb.log(metrics, step=train_state.step)
+        if accelerator.is_main_process and train_state.step % config.train.eval_interval == 0:
+            torch.cuda.empty_cache()
+            logging.info('Save a grid of images...')
+            if config.train.mode == 'uncond':
+                if config.train.multi_modal:
+                    samples = dpm_solver_sample_multi_modal(_n_modalities=config.nnet.num_modalities, _n_samples=5 * 10, _sample_steps=50)
+                else:
+                    samples = dpm_solver_sample(_n_samples=5 * 10, _sample_steps=50)
+            elif config.train.mode == 'cond':
+                y = einops.repeat(torch.arange(5, device=device) % dataset.K, 'nrow -> (nrow ncol)', ncol=10)
+                samples = dpm_solver_sample(_n_samples=5 * 10, _sample_steps=50, y=y)
+            else:
+                raise NotImplementedError
+            if config.train.multi_modal:
+                samples = torch.stack([dataset.unpreprocess(sample) for sample in samples], dim=0)  # stack instead of cat
+                b = samples.shape[1]  # batch size
+                # Properly interleave samples from all modalities
+                # For each sample index, get all modalities before moving to next sample
+                samples = torch.stack([samples[j, i] for i in range(b) for j in range(config.nnet.num_modalities)]).view(-1, *samples.shape[2:])
+                # If the number of modalities is 3 then we plot in 9 columns
+                n_cols = 9 if config.nnet.num_modalities == 3 else 10
+                samples = make_grid(samples, n_cols)
+            else:
+                samples = make_grid(dataset.unpreprocess(samples), 10)
+            save_image(samples, os.path.join(config.sample_dir, f'{train_state.step}.png'))
+            wandb.log({'samples': wandb.Image(samples)}, step=train_state.step)
+            torch.cuda.empty_cache()
+        accelerator.wait_for_everyone()
+        if train_state.step % config.train.save_interval == 0 or train_state.step == config.train.n_steps:
+            torch.cuda.empty_cache()
+            logging.info(f'Save and eval checkpoint {train_state.step}...')
+            if accelerator.is_main_process:
+                try:
+                    train_state.save(os.path.join(config.ckpt_root, f'{train_state.step}.ckpt'))
+                except Exception as e:
+                    logging.error(f" ==> Failed to save checkpoint: {e}!!!")
+            accelerator.wait_for_everyone()
+            # TODO: Skip FID for now
+            # fid = eval_step(n_samples=10000, sample_steps=50)  # calculate fid of the saved checkpoint
+            # step_fid.append((train_state.step, fid))
+            torch.cuda.empty_cache()
+        accelerator.wait_for_everyone()
+    logging.info(f'Finish fitting, step={train_state.step}')
+    logging.info(f'step_fid: {step_fid}')
+    step_best = sorted(step_fid, key=lambda x: x[1])[0][0]
+    logging.info(f'step_best: {step_best}')
+    train_state.load(os.path.join(config.ckpt_root, f'{step_best}.ckpt'))
+    del metrics
+    accelerator.wait_for_everyone()
+    eval_step(n_samples=config.sample.n_samples, sample_steps=config.sample.sample_steps)
+from absl import flags
+from absl import app
+from ml_collections import config_flags
+import sys
+from pathlib import Path
+FLAGS = flags.FLAGS
+config_flags.DEFINE_config_file(
+    "config", None, "Training configuration.", lock_config=False)
+flags.mark_flags_as_required(["config"])
+flags.DEFINE_string("workdir", None, "Work unit directory.")
+def get_config_name():
+    argv = sys.argv
+    for i in range(1, len(argv)):
+        if argv[i].startswith('--config='):
+            return Path(argv[i].split('=')[-1]).stem
+def get_config_path():
+    argv = sys.argv
+    for i in range(1, len(argv)):
+        if argv[i].startswith('--config='):
+            path = argv[i].split('=')[-1]
+            if path.startswith('configs/'):
+                path = path[len('configs/'):]
+            return path
+def get_hparams():
+    argv = sys.argv
+    lst = []
+    for i in range(1, len(argv)):
+        assert '=' in argv[i]
+        if argv[i].startswith('--config.') and not argv[i].startswith('--config.dataset.path'):
+            hparam, val = argv[i].split('=')
+            hparam = hparam.split('.')[-1]
+            if hparam.endswith('path'):
+                val = Path(val).stem
+            lst.append(f'{hparam}={val}')
+    hparams = '-'.join(lst)
+    if hparams == '':
+        hparams = 'default'
+    return hparams
+def main(argv):
+    config = FLAGS.config
+    # config.config_name = get_config_name()
+    config.config_name = get_config_path().strip('.py')
+    config.hparams = get_hparams()
+    config.workdir = FLAGS.workdir or os.path.join('workdir', config.config_name, config.hparams)
+    config.ckpt_root = os.path.join(config.workdir, 'ckpts')
+    config.sample_dir = os.path.join(config.workdir, 'samples')
+    train(config)
+if __name__ == "__main__":
+    app.run(main)

src/COP-GEN-Beta/utils.py ADDED Viewed

	@@ -0,0 +1,240 @@

+import torch
+import torch.nn as nn
+import numpy as np
+import os
+from tqdm import tqdm
+from torchvision.utils import save_image
+from absl import logging
+from PIL import Image, ImageDraw, ImageFont
+def set_logger(log_level='info', fname=None):
+    import logging as _logging
+    handler = logging.get_absl_handler()
+    formatter = _logging.Formatter('%(asctime)s - %(filename)s - %(message)s')
+    handler.setFormatter(formatter)
+    logging.set_verbosity(log_level)
+    if fname is not None:
+        handler = _logging.FileHandler(fname)
+        handler.setFormatter(formatter)
+        logging.get_absl_logger().addHandler(handler)
+def dct2str(dct):
+    return str({k: f'{v:.6g}' for k, v in dct.items()})
+def get_nnet(name, **kwargs):
+    if name == 'uvit':
+        from libs.uvit import UViT
+        return UViT(**kwargs)
+    elif name == 'uvit_t2i':
+        from libs.uvit_t2i import UViT
+        return UViT(**kwargs)
+    elif name == 'uvit_multi_post_ln':
+        from libs.uvit_multi_post_ln import UViT
+        return UViT(**kwargs)
+    elif name == 'uvit_multi_post_ln_v1':
+        from libs.uvit_multi_post_ln_v1 import UViT
+        return UViT(**kwargs)
+    elif name == 'triffuser_multi_post_ln':
+        from libs.triffuser_multi_post_ln import Triffuser
+        return Triffuser(**kwargs)
+    else:
+        raise NotImplementedError(name)
+def set_seed(seed: int):
+    if seed is not None:
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+def get_optimizer(params, name, **kwargs):
+    if name == 'adam':
+        from torch.optim import Adam
+        return Adam(params, **kwargs)
+    elif name == 'adamw':
+        from torch.optim import AdamW
+        return AdamW(params, **kwargs)
+    else:
+        raise NotImplementedError(name)
+def customized_lr_scheduler(optimizer, warmup_steps=-1):
+    from torch.optim.lr_scheduler import LambdaLR
+    def fn(step):
+        if warmup_steps > 0:
+            return min(step / warmup_steps, 1)
+        else:
+            return 1
+    return LambdaLR(optimizer, fn)
+def get_lr_scheduler(optimizer, name, **kwargs):
+    if name == 'customized':
+        return customized_lr_scheduler(optimizer, **kwargs)
+    elif name == 'cosine':
+        from torch.optim.lr_scheduler import CosineAnnealingLR
+        return CosineAnnealingLR(optimizer, **kwargs)
+    else:
+        raise NotImplementedError(name)
+def ema(model_dest: nn.Module, model_src: nn.Module, rate):
+    param_dict_src = dict(model_src.named_parameters())
+    for p_name, p_dest in model_dest.named_parameters():
+        p_src = param_dict_src[p_name]
+        assert p_src is not p_dest
+        p_dest.data.mul_(rate).add_((1 - rate) * p_src.data)
+class TrainState(object):
+    def __init__(self, optimizer, lr_scheduler, step, nnet=None, nnet_ema=None):
+        self.optimizer = optimizer
+        self.lr_scheduler = lr_scheduler
+        self.step = step
+        self.nnet = nnet
+        self.nnet_ema = nnet_ema
+    def ema_update(self, rate=0.9999):
+        if self.nnet_ema is not None:
+            ema(self.nnet_ema, self.nnet, rate)
+    def save(self, path):
+        os.makedirs(path, exist_ok=True)
+        torch.save(self.step, os.path.join(path, 'step.pth'))
+        for key, val in self.__dict__.items():
+            if key != 'step' and val is not None:
+                torch.save(val.state_dict(), os.path.join(path, f'{key}.pth'))
+    def load(self, path):
+        logging.info(f'load from {path}')
+        self.step = torch.load(os.path.join(path, 'step.pth'))
+        for key, val in self.__dict__.items():
+            if key != 'step' and val is not None:
+                val.load_state_dict(torch.load(os.path.join(path, f'{key}.pth'), map_location='cpu'))
+    def resume(self, ckpt_root, step=None):
+        if not os.path.exists(ckpt_root):
+            return
+        if step is None:
+            ckpts = list(filter(lambda x: '.ckpt' in x, os.listdir(ckpt_root)))
+            if not ckpts:
+                return
+            steps = map(lambda x: int(x.split(".")[0]), ckpts)
+            step = max(steps)
+        ckpt_path = os.path.join(ckpt_root, f'{step}.ckpt')
+        logging.info(f'resume from {ckpt_path}')
+        self.load(ckpt_path)
+    def to(self, device):
+        for key, val in self.__dict__.items():
+            if isinstance(val, nn.Module):
+                val.to(device)
+def cnt_params(model):
+    return sum(param.numel() for param in model.parameters())
+def initialize_train_state(config, device):
+    params = []
+    nnet = get_nnet(**config.nnet)
+    params += nnet.parameters()
+    nnet_ema = get_nnet(**config.nnet)
+    nnet_ema.eval()
+    logging.info(f'nnet has {cnt_params(nnet)} parameters')
+    optimizer = get_optimizer(params, **config.optimizer)
+    lr_scheduler = get_lr_scheduler(optimizer, **config.lr_scheduler)
+    train_state = TrainState(optimizer=optimizer, lr_scheduler=lr_scheduler, step=0,
+                             nnet=nnet, nnet_ema=nnet_ema)
+    train_state.ema_update(0)
+    train_state.to(device)
+    return train_state
+def amortize(n_samples, batch_size):
+    k = n_samples // batch_size
+    r = n_samples % batch_size
+    return k * [batch_size] if r == 0 else k * [batch_size] + [r]
+def sample2dir(accelerator, path, n_samples, mini_batch_size, sample_fn, unpreprocess_fn=None):
+    os.makedirs(path, exist_ok=True)
+    idx = 0
+    batch_size = mini_batch_size * accelerator.num_processes
+    for _batch_size in tqdm(amortize(n_samples, batch_size), disable=not accelerator.is_main_process, desc='sample2dir'):
+        samples = unpreprocess_fn(sample_fn(mini_batch_size))
+        samples = accelerator.gather(samples.contiguous())[:_batch_size]
+        if accelerator.is_main_process:
+            for sample in samples:
+                save_image(sample, os.path.join(path, f"{idx}.png"))
+                idx += 1
+def grad_norm(model):
+    total_norm = 0.
+    for p in model.parameters():
+        param_norm = p.grad.data.norm(2)
+        total_norm += param_norm.item() ** 2
+    total_norm = total_norm ** (1. / 2)
+    return total_norm
+def center_crop(width, height, img):
+    resample = {'box': Image.BOX, 'lanczos': Image.LANCZOS}['lanczos']
+    crop = np.min(img.shape[:2])
+    img = img[(img.shape[0] - crop) // 2: (img.shape[0] + crop) // 2,
+          (img.shape[1] - crop) // 2: (img.shape[1] + crop) // 2]  # center crop
+    try:
+        img = Image.fromarray(img, 'RGB')
+    except:
+        img = Image.fromarray(img)
+    img = img.resize((width, height), resample)  # resize the center crop from [crop, crop] to [width, height]
+    return np.array(img).astype(np.uint8)
+def drawRoundRec(draw, color, x, y, w, h, r):
+    drawObject = draw
+    '''Rounds'''
+    drawObject.ellipse((x, y, x + r, y + r), fill=color)
+    drawObject.ellipse((x + w - r, y, x + w, y + r), fill=color)
+    drawObject.ellipse((x, y + h - r, x + r, y + h), fill=color)
+    drawObject.ellipse((x + w - r, y + h - r, x + w, y + h), fill=color)
+    '''rec.s'''
+    drawObject.rectangle((x + r / 2, y, x + w - (r / 2), y + h), fill=color)
+    drawObject.rectangle((x, y + r / 2, x + w, y + h - (r / 2)), fill=color)
+def add_water(img, text='UniDiffuser', pos=3):
+    width, height = img.size
+    scale = 4
+    scale_size = 0.5
+    img = img.resize((width * scale, height * scale), Image.LANCZOS)
+    result = Image.new(img.mode, (width * scale, height * scale), color=(255, 255, 255))
+    result.paste(img, box=(0, 0))
+    delta_w = int(width * scale * 0.27 * scale_size)  # text width
+    delta_h = width * scale * 0.05 * scale_size  # text height
+    postions = np.array([[0, 0], [0, height * scale - delta_h], [width * scale - delta_w, 0],
+                         [width * scale - delta_w, height * scale - delta_h]])
+    postion = postions[pos]
+    # 文本
+    draw = ImageDraw.Draw(result)
+    fillColor = (107, 92, 231)
+    setFont = ImageFont.truetype("assets/ArialBoldMT.ttf", int(width * scale * 0.05 * scale_size))
+    delta = 20 * scale_size
+    padding = 15 * scale_size
+    drawRoundRec(draw, (223, 230, 233), postion[0] - delta - padding, postion[1] - delta - padding,
+                 w=delta_w + 2 * padding, h=delta_h + 2 * padding, r=50 * scale_size)
+    draw.text((postion[0] - delta, postion[1] - delta), text, font=setFont, fill=fillColor)
+    return result.resize((width, height), Image.LANCZOS)