Spaces:

inoculatemedia
/

zerogpu-upscaler-interpolation

Running

App Files Files Community

inoculatemedia commited on Jul 17

Commit

e950119

verified ·

1 Parent(s): b7ae1f6

Upload 23 files

Browse files

Files changed (24) hide show

.gitattributes +8 -0
Colab_demo.ipynb +127 -0
LICENSE +21 -0
README.md +129 -13
__pycache__/video_processing.cpython-312.pyc +0 -0
app.py +294 -0
demo/I0_0.png +3 -0
demo/I0_1.png +3 -0
demo/I0_slomo_clipped.gif +3 -0
demo/I2_0.png +3 -0
demo/I2_1.png +3 -0
demo/I2_slomo_clipped.gif +3 -0
demo/i0.png +3 -0
demo/i1.png +3 -0
inference_img.py +118 -0
inference_img_SR.py +69 -0
inference_video.py +290 -0
inference_video_enhance.py +201 -0
model/loss.py +128 -0
model/pytorch_msssim/__init__.py +200 -0
model/pytorch_msssim/__pycache__/__init__.cpython-312.pyc +0 -0
model/warplayer.py +22 -0
requirements.txt +7 -0
video_processing.py +235 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+demo/I0_0.png filter=lfs diff=lfs merge=lfs -text
+demo/I0_1.png filter=lfs diff=lfs merge=lfs -text
+demo/I0_slomo_clipped.gif filter=lfs diff=lfs merge=lfs -text
+demo/i0.png filter=lfs diff=lfs merge=lfs -text
+demo/i1.png filter=lfs diff=lfs merge=lfs -text
+demo/I2_0.png filter=lfs diff=lfs merge=lfs -text
+demo/I2_1.png filter=lfs diff=lfs merge=lfs -text
+demo/I2_slomo_clipped.gif filter=lfs diff=lfs merge=lfs -text

Colab_demo.ipynb ADDED Viewed

	@@ -0,0 +1,127 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "Untitled0.ipynb",
+      "provenance": [],
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/hzwer/Practical-RIFE/blob/main/Colab_demo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "FypCcZkNNt2p"
+      },
+      "source": [
+        "%cd /content\n",
+        "!git clone https://github.com/hzwer/Practical-RIFE"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1wysVHxoN54f"
+      },
+      "source": [
+        "!gdown --id 1O5KfS3KzZCY3imeCr2LCsntLhutKuAqj\n",
+        "!7z e Practical-RIFE/RIFE_trained_model_v3.8.zip"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "AhbHfRBJRAUt"
+      },
+      "source": [
+        "!mkdir /content/Practical-RIFE/train_log\n",
+        "!mv *.py /content/Practical-RIFE/train_log/\n",
+        "!mv *.pkl /content/Practical-RIFE/train_log/\n",
+        "%cd /content/Practical-RIFE/\n",
+        "!gdown --id 1i3xlKb7ax7Y70khcTcuePi6E7crO_dFc\n",
+        "!pip3 install -r requirements.txt"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rirngW5uRMdg"
+      },
+      "source": [
+        "Please upload your video to content/Practical-RIFE/video.mp4, or use our demo video."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "dnLn4aHHPzN3"
+      },
+      "source": [
+        "!nvidia-smi\n",
+        "!python3 inference_video.py --exp=1 --video=demo.mp4 --montage --skip"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "77KK6lxHgJhf"
+      },
+      "source": [
+        "Our demo.mp4 is 25FPS. You can adjust the parameters for your own perference.\n",
+        "For example: \n",
+        "--fps=60 --exp=1 --video=mydemo.avi --png"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "0zIBbVE3UfUD",
+        "cellView": "code"
+      },
+      "source": [
+        "from IPython.display import display, Image\n",
+        "import moviepy.editor as mpy\n",
+        "display(mpy.ipython_display('demo_4X_100fps.mp4', height=256, max_duration=100.))"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "tWkJCNgP3zXA"
+      },
+      "source": [
+        "!python3 inference_img.py --img demo/I0_0.png demo/I0_1.png\n",
+        "ffmpeg -r 10 -f image2 -i output/img%d.png -s 448x256 -vf \"split[s0][s1];[s0]palettegen=stats_mode=single[p];[s1][p]paletteuse=new=1\" output/slomo.gif\n",
+        "# Image interpolation"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2021 hzwer
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1,129 @@
----
-title: Zerogpu Upscaler Interpolation
-emoji: 📚
-colorFrom: gray
-colorTo: green
-sdk: gradio
-sdk_version: 5.38.0
-app_file: app.py
-pinned: false
-license: apache-2.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Practical-RIFE
+**[V4.0 Anime Demo Video](https://www.bilibili.com/video/BV1J3411t7qT?p=1&share_medium=iphone&share_plat=ios&share_session_id=7AE3DA72-D05C-43A0-9838-E2A80885BD4E&share_source=QQ&share_tag=s_i&timestamp=1639643780&unique_k=rjqO0EK)** | **[迭代经验](https://zhuanlan.zhihu.com/p/721430631)** | **[迭代QA](https://github.com/hzwer/Practical-RIFE/issues/124)** | **[Colab](https://colab.research.google.com/drive/1BZmGSq15O4ZU5vPfzkv7jFNYahTm6qwT?usp=sharing)**
+This project is based on [RIFE](https://github.com/hzwer/arXiv2020-RIFE) and [SAFA](https://github.com/megvii-research/WACV2024-SAFA). We aim to enhance their practicality for users by incorporating various features and designing new models. Since improving the PSNR index is not consistent with subjective perception. This project is intended for engineers and developers. For general users, we recommend the following software:
+**[SVFI (中文)](https://github.com/YiWeiHuang-stack/Squirrel-Video-Frame-Interpolation) | [RIFE-App](https://grisk.itch.io/rife-app) | [FlowFrames](https://nmkd.itch.io/flowframes)**
+Thanks to [SVFI team](https://github.com/Justin62628/Squirrel-RIFE) to support model testing on Animation.
+[VapourSynth-RIFE](https://github.com/HolyWu/vs-rife) | [RIFE-ncnn-vulkan](https://github.com/nihui/rife-ncnn-vulkan) | [VapourSynth-RIFE-ncnn-Vulkan](https://github.com/styler00dollar/VapourSynth-RIFE-ncnn-Vulkan) | [vs-mlrt](https://github.com/AmusementClub/vs-mlrt) | [Drop frame fixer and FPS converter](https://github.com/may-son/RIFE-FixDropFrames-and-ConvertFPS)
+## Frame Interpolation
+2024.08 - We find that 4.24+ is quite suitable for post-processing of [some diffusion model generated videos](https://drive.google.com/drive/folders/1hSzUn10Era3JCaVz0Z5Eg4wT9R6eJ9U9?usp=sharing).
+### Trained Model
+The content of these links is under the same MIT license as this project. **lite** means using similar training framework, but lower computational cost model.
+Currently, it is recommended to choose 4.25 by default for most scenes.
+4.26 - 2024.09.21 | [Google Drive](https://drive.google.com/file/d/1gViYvvQrtETBgU1w8axZSsr7YUuw31uy/view?usp=sharing) [百度网盘](https://pan.baidu.com/s/1EZsG3IFO8C1e2uRVb_Npgg?pwd=smw8) || [4.25.lite - 2024.10.20](https://drive.google.com/file/d/1zlKblGuKNatulJNFf5jdB-emp9AqGK05/view?usp=share_link)
+4.25 - 2024.09.19 | [Google Drive](https://drive.google.com/file/d/1ZKjcbmt1hypiFprJPIKW0Tt0lr_2i7bg/view?usp=sharing) [百度网盘](https://pan.baidu.com/s/1rpUX5uawusz2uwEdXtjRbw?pwd=mo6k) | I am trying using more flow blocks, so the scale_list will change accordingly. It seems that the anime scenes have been significantly improved.
+4.22 - 2024.08.08 | [Google Drive](https://drive.google.com/file/d/1qh2DSA9a1eZUTtZG9U9RQKO7N7OaUJ0_/view?usp=share_link)  [百度网盘](https://pan.baidu.com/s/1EA5BIHqOu35Rj4meg00G4g?pwd=hwym) || [4.22.lite](https://drive.google.com/file/d/1Smy6gY7BkS_RzCjPCbMEy-TsX8Ma5B0R/view?usp=sharing) || 4.21 - 2024.08.04 | [Google Drive](https://drive.google.com/file/d/1l5u6G8vEkPAT7cYYWwzB6OG8vwBYrxiS/view?usp=sharing) [百度网盘](https://pan.baidu.com/s/1TMjRFOwdLgsShKdGbTKW_g?pwd=4q6d)
+4.20 - 2024.07.24 | [Google Drive](https://drive.google.com/file/d/11n3YR7-qCRZm9RDdwtqOTsgCJUHPuexA/view?usp=sharing) [百度网盘](https://pan.baidu.com/s/1v0b7ZTSj_VvLOfW-hQ_NZQ?pwd=ykkv)
+|| 4.18 - 2024.07.03 | [Google Drive](https://drive.google.com/file/d/1octn-UVuEjXa_HlsIUbNeLTTvYCKbC_s/view?usp=sharing) [百度网盘](https://pan.baidu.com/s/1fqtxJyXSgUx-gE3rieuKxg?pwd=udr1)
+4.17 - 2024.05.24 | [Google Drive](https://drive.google.com/file/d/1962p_lEWo_kLTEynarNaRYRNVdaiQG2k/view?usp=share_link) [百度网盘](https://pan.baidu.com/s/1bMzTYoJKZXsoxuSBmzj6VQ?pwd=as37) : Add gram loss from [FILM](https://github.com/google-research/frame-interpolation/blob/69f8708f08e62c2edf46a27616a4bfcf083e2076/losses/vgg19_loss.py) || [4.17.lite](https://drive.google.com/file/d/1e9Qb4rm20UAsO7h9VILDwrpvTSHWWW8b/view?usp=share_link)
+4.15 - 2024.03.11 | [Google Drive](https://drive.google.com/file/d/1xlem7cfKoMaiLzjoeum8KIQTYO-9iqG5/view?usp=sharing) [百度网盘](https://pan.baidu.com/s/1IGNIX7JXGUwI_tfoafYHqA?pwd=bg0b) || [4.15.lite](https://drive.google.com/file/d/1BoOF-qSEnTPDjpKG1sBTa6k7Sv5_-k7z/view?usp=sharing) || 4.14 - 2024.01.08 | [Google Drive](https://drive.google.com/file/d/1BjuEY7CHZv1wzmwXSQP9ZTj0mLWu_4xy/view?usp=share_link) [百度网盘](https://pan.baidu.com/s/1d-W64lRsJTqNsgWoXYiaWQ?pwd=xawa) || [4.14.lite](https://drive.google.com/file/d/1eULia_onOtRXHMAW9VeDL8N2_7z8J1ba/view?usp=share_link)
+v4.9.2 - 2023.11.01 | [Google Drive](https://drive.google.com/file/d/1UssCvbL8N-ty0xIKM5G5ZTEgp9o4w3hp/view?usp=sharing) [百度网盘](https://pan.baidu.com/s/18cbx3EP4HWgSa1vkcXvvyw?pwd=swr9) || v4.3 - 2022.8.17 | [Google Drive](https://drive.google.com/file/d/1xrNofTGMHdt9sQv7-EOG0EChl8hZW_cU/view?usp=sharing) [百度网盘](https://pan.baidu.com/s/12AUAeZLZf5E1_Zx6WkS3xw?pwd=q83a)
+v3.8 - 2021.6.17 | [Google Drive](https://drive.google.com/file/d/1O5KfS3KzZCY3imeCr2LCsntLhutKuAqj/view?usp=sharing) [百度网盘](https://pan.baidu.com/s/1X-jpWBZWe-IQBoNAsxo2mA?pwd=kxr3) || v3.1 - 2021.5.17 | [Google Drive](https://drive.google.com/file/d/1xn4R3TQyFhtMXN2pa3lRB8cd4E1zckQe/view?usp=sharing) [百度网盘](https://pan.baidu.com/s/1W4p_Ni04HLI_jTy45sVodA?pwd=64bz)
+[More Older Version](https://github.com/megvii-research/ECCV2022-RIFE/issues/41)
+### Installation
+python <= 3.11
+```
+git clone [email protected]:hzwer/Practical-RIFE.git
+cd Practical-RIFE
+pip3 install -r requirements.txt
+```
+Download a model from the model list and put *.py and flownet.pkl on train_log/
+### Run
+You can use our [demo video](https://drive.google.com/file/d/1i3xlKb7ax7Y70khcTcuePi6E7crO_dFc/view?usp=sharing) or your video.
+```
+python3 inference_video.py --multi=2 --video=video.mp4
+```
+(generate video_2X_xxfps.mp4)
+```
+python3 inference_video.py --multi=4 --video=video.mp4
+```
+(for 4X interpolation)
+```
+python3 inference_video.py --multi=2 --video=video.mp4 --scale=0.5
+```
+(If your video has high resolution, such as 4K, we recommend set --scale=0.5 (default 1.0))
+```
+python3 inference_video.py --multi=4 --img=input/
+```
+(to read video from pngs, like input/0.png ... input/612.png, ensure that the png names are numbers)
+Parameter descriptions:
+--img / --video: The input file address
+--output: Output video name 'xxx.mp4'
+--model: Directory with trained model files
+--UHD: It is equivalent to setting scale=0.5
+--montage: Splice the generated video with the original video, like [this demo](https://www.youtube.com/watch?v=kUQ7KK6MhHw)
+--fps: Set output FPS manually
+--ext: Set output video format, default: mp4
+--multi: Interpolation frame rate multiplier
+--exp: Set --multi to 2^(--exp)
+--skip: It's no longer useful refer to [issue 207](https://github.com/hzwer/ECCV2022-RIFE/issues/207)
+### Model training
+The whole repo can be downloaded from [v4.0](https://drive.google.com/file/d/1zoSz7b8c6kUsnd4gYZ_6TrKxa7ghHJWW/view?usp=sharing), [v4.12](https://drive.google.com/file/d/1IHB35zhO4rr-JSMnpRvHhU9U65Z4giWv/view?usp=sharing), [v4.15](https://drive.google.com/file/d/19sUMZ-6H7g_hYDjTcqxYu9kE7TqnfS3k/view?usp=sharing), [v4.18](https://drive.google.com/file/d/1g8D2foww7DhGLIxtaDLr9fU3y-ByOw4B/view?usp=share_link), [v4.25](https://drive.google.com/file/d/1_l4OgBp3GrrHOcQB87xXCI7OtTzyeXZL/view?usp=share_link). However, we currently do not have the time to organize them well, they are for reference only.
+## Video Enhancement
+<img width="710" alt="image" src="https://github.com/hzwer/Practical-RIFE/assets/10103856/5bae134c-0747-4084-bbab-37b1595352f1">
+We are developing a practical model of [SAFA](https://github.com/megvii-research/WACV2024-SAFA). Welcome to check its [demo](https://www.youtube.com/watch?v=QII2KQSBBwk) ([BiliBili](https://www.bilibili.com/video/BV1Up4y1d7kF/)) and provide advice.
+v0.5 - 2023.12.26 | [Google Drive](https://drive.google.com/file/d/1OLO9hLV97ZQ4uRV2-aQqgnwhbKMMt6TX/view?usp=sharing)
+```
+python3 inference_video_enhance.py --video=demo.mp4
+```
+## Citation
+```
+@inproceedings{huang2022rife,
+  title={Real-Time Intermediate Flow Estimation for Video Frame Interpolation},
+  author={Huang, Zhewei and Zhang, Tianyuan and Heng, Wen and Shi, Boxin and Zhou, Shuchang},
+  booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+  year={2022}
+}
+```
+```
+@inproceedings{huang2024safa,
+  title={Scale-Adaptive Feature Aggregation for Efficient Space-Time Video Super-Resolution},
+  author={Huang, Zhewei and Huang, Ailin and Hu, Xiaotao and Hu, Chen and Xu, Jun and Zhou, Shuchang},
+  booktitle={Winter Conference on Applications of Computer Vision (WACV)},
+  year={2024}
+}
+```
+## Reference
+Optical Flow:
+[ARFlow](https://github.com/lliuz/ARFlow)  [pytorch-liteflownet](https://github.com/sniklaus/pytorch-liteflownet)  [RAFT](https://github.com/princeton-vl/RAFT)  [pytorch-PWCNet](https://github.com/sniklaus/pytorch-pwc)
+Video Interpolation:
+[DVF](https://github.com/lxx1991/pytorch-voxel-flow)  [TOflow](https://github.com/Coldog2333/pytoflow)  [SepConv](https://github.com/sniklaus/sepconv-slomo)  [DAIN](https://github.com/baowenbo/DAIN)  [CAIN](https://github.com/myungsub/CAIN)  [MEMC-Net](https://github.com/baowenbo/MEMC-Net)   [SoftSplat](https://github.com/sniklaus/softmax-splatting)  [BMBC](https://github.com/JunHeum/BMBC)  [EDSC](https://github.com/Xianhang/EDSC-pytorch)  [EQVI](https://github.com/lyh-18/EQVI) [RIFE](https://github.com/hzwer/arXiv2020-RIFE)

__pycache__/video_processing.cpython-312.pyc ADDED Viewed

Binary file (12.1 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,294 @@

+import gradio as gr
+import numpy as np
+from PIL import Image, ImageFilter
+import cv2
+import os
+import torch
+import torch.nn.functional as F
+from torchvision import transforms
+import warnings
+from video_processing import process_video
+warnings.filterwarnings("ignore")
+# ZeroGPU decorator (if available)
+try:
+    import spaces
+    HAS_ZEROGPU = True
+except ImportError:
+    HAS_ZEROGPU = False
+    # Create a dummy decorator if spaces is not available
+    def spaces_gpu(func):
+        return func
+    spaces = type('spaces', (), {'GPU': spaces_gpu})()
+# VAAPI acceleration check
+def check_vaapi_support():
+    """Check if VAAPI is available for hardware acceleration"""
+    try:
+        # Check if VAAPI devices are available
+        vaapi_devices = [f for f in os.listdir('/dev/dri') if f.startswith('render')]
+        return len(vaapi_devices) > 0
+    except:
+        return False
+HAS_VAAPI = check_vaapi_support()
+class TorchUpscaler:
+    """PyTorch-based upscaler that can use GPU acceleration"""
+    def __init__(self, device='auto'):
+        if device == 'auto':
+            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        else:
+            self.device = torch.device(device)
+        print(f"Using device: {self.device}")
+    def bicubic_torch(self, image_tensor, scale_factor):
+        """GPU-accelerated bicubic upscaling using PyTorch"""
+        return F.interpolate(
+            image_tensor,
+            scale_factor=scale_factor,
+            mode='bicubic',
+            align_corners=False,
+            antialias=True
+        )
+    def lanczos_torch(self, image_tensor, scale_factor):
+        """GPU-accelerated Lanczos-style upscaling"""
+        return F.interpolate(
+            image_tensor,
+            scale_factor=scale_factor,
+            mode='bicubic',
+            align_corners=False,
+            antialias=True
+        )
+    def esrgan_style_upscale(self, image_tensor, scale_factor):
+        """Simple ESRGAN-style upscaling using convolutions"""
+        b, c, h, w = image_tensor.shape
+        upscaled = F.interpolate(image_tensor, scale_factor=scale_factor, mode='bicubic', align_corners=False)
+        kernel = torch.tensor([[[[-1, -1, -1],
+                                [-1,  9, -1],
+                                [-1, -1, -1]]]], dtype=torch.float32, device=self.device)
+        kernel = kernel.repeat(c, 1, 1, 1)
+        sharpened = F.conv2d(upscaled, kernel, padding=1, groups=c)
+        result = 0.8 * upscaled + 0.2 * sharpened
+        return torch.clamp(result, 0, 1)
+class VAAPIUpscaler:
+    """VAAPI hardware-accelerated upscaler"""
+    def __init__(self):
+        self.vaapi_available = HAS_VAAPI
+        if self.vaapi_available:
+            print("VAAPI hardware acceleration available")
+        else:
+            print("VAAPI hardware acceleration not available")
+    def upscale_vaapi(self, image_array, scale_factor, method):
+        """Use VAAPI for hardware-accelerated upscaling"""
+        if not self.vaapi_available:
+            return None
+        try:
+            h, w = image_array.shape[:2]
+            new_h, new_w = int(h * scale_factor), int(w * scale_factor)
+            if method == "VAAPI_BICUBIC":
+                return cv2.resize(image_array, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
+            elif method == "VAAPI_LANCZOS":
+                return cv2.resize(image_array, (new_w, new_h), interpolation=cv2.INTER_LANCZOS4)
+        except Exception as e:
+            print(f"VAAPI upscaling failed: {e}")
+            return None
+torch_upscaler = TorchUpscaler()
+vaapi_upscaler = VAAPIUpscaler()
+@spaces.GPU if HAS_ZEROGPU else lambda x: x
+def upscale_image_accelerated(image, scale_factor, method, enhance_quality, use_gpu_acceleration):
+    if image is None:
+        return None
+    original_width, original_height = image.size
+    new_width = int(original_width * scale_factor)
+    new_height = int(original_height * scale_factor)
+    try:
+        if use_gpu_acceleration and torch.cuda.is_available():
+            print("Using GPU acceleration")
+            transform = transforms.Compose([transforms.ToTensor()])
+            image_tensor = transform(image).unsqueeze(0).to(torch_upscaler.device)
+            if method == "GPU_Bicubic":
+                upscaled_tensor = torch_upscaler.bicubic_torch(image_tensor, scale_factor)
+            elif method == "GPU_Lanczos":
+                upscaled_tensor = torch_upscaler.lanczos_torch(image_tensor, scale_factor)
+            elif method == "GPU_ESRGAN_Style":
+                upscaled_tensor = torch_upscaler.esrgan_style_upscale(image_tensor, scale_factor)
+            else:
+                upscaled_tensor = torch_upscaler.bicubic_torch(image_tensor, scale_factor)
+            upscaled = transforms.ToPILImage()(upscaled_tensor.squeeze(0).cpu())
+        elif method.startswith("VAAPI_") and HAS_VAAPI:
+            print("Using VAAPI acceleration")
+            img_array = np.array(image)
+            upscaled_array = vaapi_upscaler.upscale_vaapi(img_array, scale_factor, method)
+            upscaled = Image.fromarray(upscaled_array) if upscaled_array is not None else image.resize((new_width, new_height), Image.BICUBIC)
+        else:
+            print("Using CPU methods")
+            if method == "Bicubic":
+                upscaled = image.resize((new_width, new_height), Image.BICUBIC)
+            elif method == "Lanczos":
+                upscaled = image.resize((new_width, new_height), Image.LANCZOS)
+            else:
+                upscaled = image.resize((new_width, new_height), Image.BICUBIC)
+        if enhance_quality:
+            upscaled = upscaled.filter(ImageFilter.UnsharpMask(radius=1, percent=120, threshold=3))
+        return upscaled
+    except Exception as e:
+        print(f"Error during upscaling: {e}")
+        return image
+def get_available_methods():
+    methods = ["Bicubic", "Lanczos"]
+    if torch.cuda.is_available():
+        methods.extend(["GPU_Bicubic", "GPU_Lanczos", "GPU_ESRGAN_Style"])
+    if HAS_VAAPI:
+        methods.extend(["VAAPI_BICUBIC", "VAAPI_LANCZOS"])
+    return methods
+def get_system_info():
+    info = []
+    if torch.cuda.is_available():
+        gpu_name = torch.cuda.get_device_name(0)
+        gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
+        info.append(f"🚀 CUDA GPU: {gpu_name} ({gpu_memory:.1f} GB)")
+    else:
+        info.append("❌ CUDA not available")
+    if HAS_ZEROGPU:
+        info.append("✅ ZeroGPU support enabled")
+    if HAS_VAAPI:
+        info.append("✅ VAAPI hardware acceleration available")
+    return "\n".join(info)
+def process_and_info_accelerated(image, scale_factor, method, enhance_quality, use_gpu_acceleration):
+    if image is None:
+        return None, "Please upload an image first"
+    original_info = f"Original: {image.size[0]} × {image.size[1]} pixels"
+    result = upscale_image_accelerated(image, scale_factor, method, enhance_quality, use_gpu_acceleration)
+    if result is None:
+        return None, "Error processing image"
+    result_info = f"Upscaled: {result.size[0]} × {result.size[1]} pixels"
+    accel_info = "GPU/Hardware" if use_gpu_acceleration else "CPU"
+    combined_info = f"""
+    ## Processing Details
+    {original_info}
+    {result_info}
+    **Scale Factor:** {scale_factor}x
+    **Method:** {method}
+    **Acceleration:** {accel_info}
+    **Quality Enhancement:** {'✅' if enhance_quality else '❌'}
+    ## System Status
+    {get_system_info()}
+    """
+    return result, combined_info
+def create_accelerated_upscaler_ui():
+    available_methods = get_available_methods()
+    gr.Markdown("## 🚀 Accelerated Image Upscaler")
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_image = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"])
+            scale_factor = gr.Slider(minimum=1.5, maximum=4.0, step=0.5, value=2.0, label="Scale Factor")
+            method = gr.Dropdown(choices=available_methods, value=available_methods[0], label="Upscaling Method")
+            use_gpu_acceleration = gr.Checkbox(label="Use GPU Acceleration", value=torch.cuda.is_available())
+            enhance_quality = gr.Checkbox(label="Apply Quality Enhancement", value=True)
+            process_btn = gr.Button("🚀 Upscale Image", variant="primary")
+        with gr.Column(scale=2):
+            output_image = gr.Image(label="Upscaled Image", type="pil")
+            image_info = gr.Markdown(value=f"## System Status\n{get_system_info()}", label="Processing Information")
+    process_btn.click(
+        fn=process_and_info_accelerated,
+        inputs=[input_image, scale_factor, method, enhance_quality, use_gpu_acceleration],
+        outputs=[output_image, image_info]
+    )
+def create_video_interface_ui():
+    gr.Markdown("## 🚀 Video Upscaler and Frame Interpolator")
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_video = gr.Video(label="Upload Video", sources=["upload"])
+            scale_factor = gr.Slider(minimum=1.5, maximum=4.0, step=0.5, value=2.0, label="Scale Factor")
+            multi = gr.Slider(minimum=2, maximum=8, step=1, value=2, label="Frame Multiplier")
+            use_gpu_acceleration = gr.Checkbox(label="Use GPU Acceleration", value=torch.cuda.is_available())
+            process_btn = gr.Button("🚀 Process Video", variant="primary")
+        with gr.Column(scale=2):
+            output_video = gr.Video(label="Processed Video")
+            processing_info = gr.Markdown(value=f"## System Status\n{get_system_info()}", label="Processing Information")
+    process_btn.click(
+        fn=process_video_wrapper,
+        inputs=[input_video, scale_factor, multi, use_gpu_acceleration],
+        outputs=[output_video, processing_info]
+    )
+def process_video_wrapper(video_path, scale_factor, multi, use_gpu):
+    if video_path is None:
+        return None, "Please upload a video first"
+    output_path = "temp_output.mp4"
+    modelDir = 'rife/train_log'
+    processed_video_path = process_video(
+        video=video_path,
+        output=output_path,
+        modelDir=modelDir,
+        fp16=use_gpu,
+        UHD=False,
+        scale=scale_factor,
+        skip=False,
+        fps=None,
+        png=False,
+        ext='mp4',
+        exp=1,
+        multi=multi
+    )
+    info = f"""
+    ## Processing Details
+    **Scale Factor:** {scale_factor}x
+    **Frame Multiplier:** {multi}x
+    **Acceleration:** {'GPU' if use_gpu else 'CPU'}
+    ## System Status
+    {get_system_info()}
+    """
+    return processed_video_path, info
+with gr.Blocks(title="Accelerated Media Processor", theme=gr.themes.Soft()) as demo:
+    with gr.Tab("Image Upscaler"):
+        create_accelerated_upscaler_ui()
+    with gr.Tab("Video Processing"):
+        create_video_interface_ui()
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        debug=True
+    )

demo/I0_0.png ADDED Viewed

Git LFS Details

SHA256: dffc62ff6436b3d1c02aec36a8b3c65603bd9d83e0001e942c4b99d1e509a6c3
Pointer size: 131 Bytes
Size of remote file: 180 kB

demo/I0_1.png ADDED Viewed

Git LFS Details

SHA256: fcf4eb07e7c63de8f1508fb93854ce464dd4bc9fd6e85be0e5922b9f700c7c6a
Pointer size: 131 Bytes
Size of remote file: 183 kB

demo/I0_slomo_clipped.gif ADDED Viewed

Git LFS Details

SHA256: 5ad98421883a509d66916a8cf87fcd1a4268f23ab85cf09910e5709a466f9aa9
Pointer size: 132 Bytes
Size of remote file: 1.11 MB

demo/I2_0.png ADDED Viewed

Git LFS Details

SHA256: d8598c23c159508b348ecd7cbab5bf2a00530f6bd3316bf3ac5c09ddec4c14c1
Pointer size: 131 Bytes
Size of remote file: 163 kB

demo/I2_1.png ADDED Viewed

Git LFS Details

SHA256: cc0c17b74bff42d21793ede192a7ec028df569a9bc466c5b17ab0cbaf76d53fc
Pointer size: 131 Bytes
Size of remote file: 163 kB

demo/I2_slomo_clipped.gif ADDED Viewed

Git LFS Details

SHA256: a15a9a33ce9d87173ea7a4e9c4722b14f81a52633292a1e4e7babeac62fbc623
Pointer size: 131 Bytes
Size of remote file: 967 kB

demo/i0.png ADDED Viewed

Git LFS Details

SHA256: a01f79bf3c485c6d59284f36c9ec7933598313ae847b0394acbc0c4573491687
Pointer size: 131 Bytes
Size of remote file: 132 kB

demo/i1.png ADDED Viewed

Git LFS Details

SHA256: 7ba4a0f8eae5a62ce567ef2611349e56907e29b968b9da5d2e57a8f4525d51ea
Pointer size: 131 Bytes
Size of remote file: 132 kB

inference_img.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import os
+import cv2
+import torch
+import argparse
+from torch.nn import functional as F
+import warnings
+warnings.filterwarnings("ignore")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+torch.set_grad_enabled(False)
+if torch.cuda.is_available():
+    torch.backends.cudnn.enabled = True
+    torch.backends.cudnn.benchmark = True
+parser = argparse.ArgumentParser(description='Interpolation for a pair of images')
+parser.add_argument('--img', dest='img', nargs=2, required=True)
+parser.add_argument('--exp', default=4, type=int)
+parser.add_argument('--ratio', default=0, type=float, help='inference ratio between two images with 0 - 1 range')
+parser.add_argument('--rthreshold', default=0.02, type=float, help='returns image when actual ratio falls in given range threshold')
+parser.add_argument('--rmaxcycles', default=8, type=int, help='limit max number of bisectional cycles')
+parser.add_argument('--model', dest='modelDir', type=str, default='train_log', help='directory with trained model files')
+args = parser.parse_args()
+try:
+    try:
+        from model.RIFE_HDv2 import Model
+        model = Model()
+        model.load_model(args.modelDir, -1)
+        print("Loaded v2.x HD model.")
+    except:
+        from train_log.RIFE_HDv3 import Model
+        model = Model()
+        model.load_model(args.modelDir, -1)
+        print("Loaded v3.x HD model.")
+except:
+    from model.RIFE_HD import Model
+    model = Model()
+    model.load_model(args.modelDir, -1)
+    print("Loaded v1.x HD model")
+if not hasattr(model, 'version'):
+    model.version = 0
+model.eval()
+model.device()
+if args.img[0].endswith('.exr') and args.img[1].endswith('.exr'):
+    img0 = cv2.imread(args.img[0], cv2.IMREAD_COLOR | cv2.IMREAD_ANYDEPTH)
+    img1 = cv2.imread(args.img[1], cv2.IMREAD_COLOR | cv2.IMREAD_ANYDEPTH)
+    img0 = (torch.tensor(img0.transpose(2, 0, 1)).to(device)).unsqueeze(0)
+    img1 = (torch.tensor(img1.transpose(2, 0, 1)).to(device)).unsqueeze(0)
+else:
+    img0 = cv2.imread(args.img[0], cv2.IMREAD_UNCHANGED)
+    img1 = cv2.imread(args.img[1], cv2.IMREAD_UNCHANGED)
+    img0 = cv2.resize(img0, (448, 256))
+    img1 = cv2.resize(img1, (448, 256))
+    img0 = (torch.tensor(img0.transpose(2, 0, 1)).to(device) / 255.).unsqueeze(0)
+    img1 = (torch.tensor(img1.transpose(2, 0, 1)).to(device) / 255.).unsqueeze(0)
+n, c, h, w = img0.shape
+ph = ((h - 1) // 64 + 1) * 64
+pw = ((w - 1) // 64 + 1) * 64
+padding = (0, pw - w, 0, ph - h)
+img0 = F.pad(img0, padding)
+img1 = F.pad(img1, padding)
+if args.ratio:
+    if model.version >= 3.9:
+        img_list = [img0, model.inference(img0, img1, args.ratio), img1]
+    else:
+        img0_ratio = 0.0
+        img1_ratio = 1.0
+        if args.ratio <= img0_ratio + args.rthreshold / 2:
+            middle = img0
+        elif args.ratio >= img1_ratio - args.rthreshold / 2:
+            middle = img1
+        else:
+            tmp_img0 = img0
+            tmp_img1 = img1
+            for inference_cycle in range(args.rmaxcycles):
+                middle = model.inference(tmp_img0, tmp_img1)
+                middle_ratio = ( img0_ratio + img1_ratio ) / 2
+                if args.ratio - (args.rthreshold / 2) <= middle_ratio <= args.ratio + (args.rthreshold / 2):
+                    break
+                if args.ratio > middle_ratio:
+                    tmp_img0 = middle
+                    img0_ratio = middle_ratio
+                else:
+                    tmp_img1 = middle
+                    img1_ratio = middle_ratio
+        img_list.append(middle)
+        img_list.append(img1)
+else:
+    if model.version >= 3.9:
+        img_list = [img0]
+        n = 2 ** args.exp
+        for i in range(n-1):
+            img_list.append(model.inference(img0, img1, (i+1) * 1. / n))
+        img_list.append(img1)
+    else:
+        img_list = [img0, img1]
+        for i in range(args.exp):
+            tmp = []
+            for j in range(len(img_list) - 1):
+                mid = model.inference(img_list[j], img_list[j + 1])
+                tmp.append(img_list[j])
+                tmp.append(mid)
+            tmp.append(img1)
+            img_list = tmp
+if not os.path.exists('output'):
+    os.mkdir('output')
+for i in range(len(img_list)):
+    if args.img[0].endswith('.exr') and args.img[1].endswith('.exr'):
+        cv2.imwrite('output/img{}.exr'.format(i), (img_list[i][0]).cpu().numpy().transpose(1, 2, 0)[:h, :w], [cv2.IMWRITE_EXR_TYPE, cv2.IMWRITE_EXR_TYPE_HALF])
+    else:
+        cv2.imwrite('output/img{}.png'.format(i), (img_list[i][0] * 255).byte().cpu().numpy().transpose(1, 2, 0)[:h, :w])

inference_img_SR.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import cv2
+import torch
+import argparse
+from torch.nn import functional as F
+import warnings
+warnings.filterwarnings("ignore")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+torch.set_grad_enabled(False)
+if torch.cuda.is_available():
+    torch.backends.cudnn.enabled = True
+    torch.backends.cudnn.benchmark = True
+parser = argparse.ArgumentParser(description='STVSR for a pair of images')
+parser.add_argument('--img', dest='img', nargs=2, required=True)
+parser.add_argument('--exp', default=2, type=int)
+parser.add_argument('--ratio', default=0, type=float, help='inference ratio between two images with 0 - 1 range')
+parser.add_argument('--model', dest='modelDir', type=str, default='train_log', help='directory with trained model files')
+args = parser.parse_args()
+from train_log.model import Model
+model = Model()
+model.device()
+model.load_model('train_log')
+model.eval()
+if args.img[0].endswith('.exr') and args.img[1].endswith('.exr'):
+    img0 = cv2.imread(args.img[0], cv2.IMREAD_COLOR | cv2.IMREAD_ANYDEPTH)
+    img1 = cv2.imread(args.img[1], cv2.IMREAD_COLOR | cv2.IMREAD_ANYDEPTH)
+    img0 = cv2.resize(img0, (0, 0), fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
+    img1 = cv2.resize(img1, (0, 0), fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
+    img0 = (torch.tensor(img0.transpose(2, 0, 1)).to(device)).unsqueeze(0)
+    img1 = (torch.tensor(img1.transpose(2, 0, 1)).to(device)).unsqueeze(0)
+else:
+    img0 = cv2.imread(args.img[0], cv2.IMREAD_UNCHANGED)
+    img1 = cv2.imread(args.img[1], cv2.IMREAD_UNCHANGED)
+    img0 = cv2.resize(img0, (0, 0), fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
+    img1 = cv2.resize(img1, (0, 0), fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
+    img0 = (torch.tensor(img0.transpose(2, 0, 1)).to(device) / 255.).unsqueeze(0)
+    img1 = (torch.tensor(img1.transpose(2, 0, 1)).to(device) / 255.).unsqueeze(0)
+n, c, h, w = img0.shape
+ph = ((h - 1) // 32 + 1) * 32
+pw = ((w - 1) // 32 + 1) * 32
+padding = (0, pw - w, 0, ph - h)
+img0 = F.pad(img0, padding)
+img1 = F.pad(img1, padding)
+if args.ratio:
+    print('ratio={}'.format(args.ratio))
+    img_list = model.inference(img0, img1, timestep=args.ratio)
+else:
+    n = 2 ** args.exp - 1
+    time_list = [0]
+    for i in range(n):
+        time_list.append((i+1) * 1. / (n+1))
+    time_list.append(1)
+    print(time_list)
+    img_list = model.inference(img0, img1, timestep=time_list)
+if not os.path.exists('output'):
+    os.mkdir('output')
+for i in range(len(img_list)):
+    if args.img[0].endswith('.exr') and args.img[1].endswith('.exr'):
+        cv2.imwrite('output/img{}.exr'.format(i), (img_list[i][0]).cpu().numpy().transpose(1, 2, 0)[:h, :w], [cv2.IMWRITE_EXR_TYPE, cv2.IMWRITE_EXR_TYPE_HALF])
+    else:
+        cv2.imwrite('output/img{}.png'.format(i), (img_list[i][0] * 255).byte().cpu().numpy().transpose(1, 2, 0)[:h, :w])

inference_video.py ADDED Viewed

	@@ -0,0 +1,290 @@

+import os
+import cv2
+import torch
+import argparse
+import numpy as np
+from tqdm import tqdm
+from torch.nn import functional as F
+import warnings
+import _thread
+import skvideo.io
+from queue import Queue, Empty
+from model.pytorch_msssim import ssim_matlab
+warnings.filterwarnings("ignore")
+def transferAudio(sourceVideo, targetVideo):
+    import shutil
+    import moviepy.editor
+    tempAudioFileName = "./temp/audio.mkv"
+    # split audio from original video file and store in "temp" directory
+    if True:
+        # clear old "temp" directory if it exits
+        if os.path.isdir("temp"):
+            # remove temp directory
+            shutil.rmtree("temp")
+        # create new "temp" directory
+        os.makedirs("temp")
+        # extract audio from video
+        os.system('ffmpeg -y -i "{}" -c:a copy -vn {}'.format(sourceVideo, tempAudioFileName))
+    targetNoAudio = os.path.splitext(targetVideo)[0] + "_noaudio" + os.path.splitext(targetVideo)[1]
+    os.rename(targetVideo, targetNoAudio)
+    # combine audio file and new video file
+    os.system('ffmpeg -y -i "{}" -i {} -c copy "{}"'.format(targetNoAudio, tempAudioFileName, targetVideo))
+    if os.path.getsize(targetVideo) == 0: # if ffmpeg failed to merge the video and audio together try converting the audio to aac
+        tempAudioFileName = "./temp/audio.m4a"
+        os.system('ffmpeg -y -i "{}" -c:a aac -b:a 160k -vn {}'.format(sourceVideo, tempAudioFileName))
+        os.system('ffmpeg -y -i "{}" -i {} -c copy "{}"'.format(targetNoAudio, tempAudioFileName, targetVideo))
+        if (os.path.getsize(targetVideo) == 0): # if aac is not supported by selected format
+            os.rename(targetNoAudio, targetVideo)
+            print("Audio transfer failed. Interpolated video will have no audio")
+        else:
+            print("Lossless audio transfer failed. Audio was transcoded to AAC (M4A) instead.")
+            # remove audio-less video
+            os.remove(targetNoAudio)
+    else:
+        os.remove(targetNoAudio)
+    # remove temp directory
+    shutil.rmtree("temp")
+parser = argparse.ArgumentParser(description='Interpolation for a pair of images')
+parser.add_argument('--video', dest='video', type=str, default=None)
+parser.add_argument('--output', dest='output', type=str, default=None)
+parser.add_argument('--img', dest='img', type=str, default=None)
+parser.add_argument('--montage', dest='montage', action='store_true', help='montage origin video')
+parser.add_argument('--model', dest='modelDir', type=str, default='train_log', help='directory with trained model files')
+parser.add_argument('--fp16', dest='fp16', action='store_true', help='fp16 mode for faster and more lightweight inference on cards with Tensor Cores')
+parser.add_argument('--UHD', dest='UHD', action='store_true', help='support 4k video')
+parser.add_argument('--scale', dest='scale', type=float, default=1.0, help='Try scale=0.5 for 4k video')
+parser.add_argument('--skip', dest='skip', action='store_true', help='whether to remove static frames before processing')
+parser.add_argument('--fps', dest='fps', type=int, default=None)
+parser.add_argument('--png', dest='png', action='store_true', help='whether to vid_out png format vid_outs')
+parser.add_argument('--ext', dest='ext', type=str, default='mp4', help='vid_out video extension')
+parser.add_argument('--exp', dest='exp', type=int, default=1)
+parser.add_argument('--multi', dest='multi', type=int, default=2)
+args = parser.parse_args()
+if args.exp != 1:
+    args.multi = (2 ** args.exp)
+assert (not args.video is None or not args.img is None)
+if args.skip:
+    print("skip flag is abandoned, please refer to issue #207.")
+if args.UHD and args.scale==1.0:
+    args.scale = 0.5
+assert args.scale in [0.25, 0.5, 1.0, 2.0, 4.0]
+if not args.img is None:
+    args.png = True
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+torch.set_grad_enabled(False)
+if torch.cuda.is_available():
+    torch.backends.cudnn.enabled = True
+    torch.backends.cudnn.benchmark = True
+    if(args.fp16):
+        torch.set_default_tensor_type(torch.cuda.HalfTensor)
+from train_log.RIFE_HDv3 import Model
+model = Model()
+if not hasattr(model, 'version'):
+    model.version = 0
+model.load_model(args.modelDir, -1)
+print("Loaded 3.x/4.x HD model.")
+model.eval()
+model.device()
+if not args.video is None:
+    videoCapture = cv2.VideoCapture(args.video)
+    fps = videoCapture.get(cv2.CAP_PROP_FPS)
+    tot_frame = videoCapture.get(cv2.CAP_PROP_FRAME_COUNT)
+    videoCapture.release()
+    if args.fps is None:
+        fpsNotAssigned = True
+        args.fps = fps * args.multi
+    else:
+        fpsNotAssigned = False
+    videogen = skvideo.io.vreader(args.video)
+    lastframe = next(videogen)
+    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
+    video_path_wo_ext, ext = os.path.splitext(args.video)
+    print('{}.{}, {} frames in total, {}FPS to {}FPS'.format(video_path_wo_ext, args.ext, tot_frame, fps, args.fps))
+    if args.png == False and fpsNotAssigned == True:
+        print("The audio will be merged after interpolation process")
+    else:
+        print("Will not merge audio because using png or fps flag!")
+else:
+    videogen = []
+    for f in os.listdir(args.img):
+        if 'png' in f:
+            videogen.append(f)
+    tot_frame = len(videogen)
+    videogen.sort(key= lambda x:int(x[:-4]))
+    lastframe = cv2.imread(os.path.join(args.img, videogen[0]), cv2.IMREAD_UNCHANGED)[:, :, ::-1].copy()
+    videogen = videogen[1:]
+h, w, _ = lastframe.shape
+vid_out_name = None
+vid_out = None
+if args.png:
+    if not os.path.exists('vid_out'):
+        os.mkdir('vid_out')
+else:
+    if args.output is not None:
+        vid_out_name = args.output
+    else:
+        vid_out_name = '{}_{}X_{}fps.{}'.format(video_path_wo_ext, args.multi, int(np.round(args.fps)), args.ext)
+    vid_out = cv2.VideoWriter(vid_out_name, fourcc, args.fps, (w, h))
+def clear_write_buffer(user_args, write_buffer):
+    cnt = 0
+    while True:
+        item = write_buffer.get()
+        if item is None:
+            break
+        if user_args.png:
+            cv2.imwrite('vid_out/{:0>7d}.png'.format(cnt), item[:, :, ::-1])
+            cnt += 1
+        else:
+            vid_out.write(item[:, :, ::-1])
+def build_read_buffer(user_args, read_buffer, videogen):
+    try:
+        for frame in videogen:
+            if not user_args.img is None:
+                frame = cv2.imread(os.path.join(user_args.img, frame), cv2.IMREAD_UNCHANGED)[:, :, ::-1].copy()
+            if user_args.montage:
+                frame = frame[:, left: left + w]
+            read_buffer.put(frame)
+    except:
+        pass
+    read_buffer.put(None)
+def make_inference(I0, I1, n):
+    global model
+    if model.version >= 3.9:
+        res = []
+        for i in range(n):
+            res.append(model.inference(I0, I1, (i+1) * 1. / (n+1), args.scale))
+        return res
+    else:
+        middle = model.inference(I0, I1, args.scale)
+        if n == 1:
+            return [middle]
+        first_half = make_inference(I0, middle, n=n//2)
+        second_half = make_inference(middle, I1, n=n//2)
+        if n%2:
+            return [*first_half, middle, *second_half]
+        else:
+            return [*first_half, *second_half]
+def pad_image(img):
+    if(args.fp16):
+        return F.pad(img, padding).half()
+    else:
+        return F.pad(img, padding)
+if args.montage:
+    left = w // 4
+    w = w // 2
+tmp = max(128, int(128 / args.scale))
+ph = ((h - 1) // tmp + 1) * tmp
+pw = ((w - 1) // tmp + 1) * tmp
+padding = (0, pw - w, 0, ph - h)
+pbar = tqdm(total=tot_frame)
+if args.montage:
+    lastframe = lastframe[:, left: left + w]
+write_buffer = Queue(maxsize=500)
+read_buffer = Queue(maxsize=500)
+_thread.start_new_thread(build_read_buffer, (args, read_buffer, videogen))
+_thread.start_new_thread(clear_write_buffer, (args, write_buffer))
+I1 = torch.from_numpy(np.transpose(lastframe, (2,0,1))).to(device, non_blocking=True).unsqueeze(0).float() / 255.
+I1 = pad_image(I1)
+temp = None # save lastframe when processing static frame
+while True:
+    if temp is not None:
+        frame = temp
+        temp = None
+    else:
+        frame = read_buffer.get()
+    if frame is None:
+        break
+    I0 = I1
+    I1 = torch.from_numpy(np.transpose(frame, (2,0,1))).to(device, non_blocking=True).unsqueeze(0).float() / 255.
+    I1 = pad_image(I1)
+    I0_small = F.interpolate(I0, (32, 32), mode='bilinear', align_corners=False)
+    I1_small = F.interpolate(I1, (32, 32), mode='bilinear', align_corners=False)
+    ssim = ssim_matlab(I0_small[:, :3], I1_small[:, :3])
+    break_flag = False
+    if ssim > 0.996:
+        frame = read_buffer.get() # read a new frame
+        if frame is None:
+            break_flag = True
+            frame = lastframe
+        else:
+            temp = frame
+        I1 = torch.from_numpy(np.transpose(frame, (2,0,1))).to(device, non_blocking=True).unsqueeze(0).float() / 255.
+        I1 = pad_image(I1)
+        I1 = model.inference(I0, I1, scale=args.scale)
+        I1_small = F.interpolate(I1, (32, 32), mode='bilinear', align_corners=False)
+        ssim = ssim_matlab(I0_small[:, :3], I1_small[:, :3])
+        frame = (I1[0] * 255).byte().cpu().numpy().transpose(1, 2, 0)[:h, :w]
+    if ssim < 0.2:
+        output = []
+        for i in range(args.multi - 1):
+            output.append(I0)
+        '''
+        output = []
+        step = 1 / args.multi
+        alpha = 0
+        for i in range(args.multi - 1):
+            alpha += step
+            beta = 1-alpha
+            output.append(torch.from_numpy(np.transpose((cv2.addWeighted(frame[:, :, ::-1], alpha, lastframe[:, :, ::-1], beta, 0)[:, :, ::-1].copy()), (2,0,1))).to(device, non_blocking=True).unsqueeze(0).float() / 255.)
+        '''
+    else:
+        output = make_inference(I0, I1, args.multi - 1)
+    if args.montage:
+        write_buffer.put(np.concatenate((lastframe, lastframe), 1))
+        for mid in output:
+            mid = (((mid[0] * 255.).byte().cpu().numpy().transpose(1, 2, 0)))
+            write_buffer.put(np.concatenate((lastframe, mid[:h, :w]), 1))
+    else:
+        write_buffer.put(lastframe)
+        for mid in output:
+            mid = (((mid[0] * 255.).byte().cpu().numpy().transpose(1, 2, 0)))
+            write_buffer.put(mid[:h, :w])
+    pbar.update(1)
+    lastframe = frame
+    if break_flag:
+        break
+if args.montage:
+    write_buffer.put(np.concatenate((lastframe, lastframe), 1))
+else:
+    write_buffer.put(lastframe)
+write_buffer.put(None)
+import time
+while(not write_buffer.empty()):
+    time.sleep(0.1)
+pbar.close()
+if not vid_out is None:
+    vid_out.release()
+# move audio to new video file if appropriate
+if args.png == False and fpsNotAssigned == True and not args.video is None:
+    try:
+        transferAudio(args.video, vid_out_name)
+    except:
+        print("Audio transfer failed. Interpolated video will have no audio")
+        targetNoAudio = os.path.splitext(vid_out_name)[0] + "_noaudio" + os.path.splitext(vid_out_name)[1]
+        os.rename(targetNoAudio, vid_out_name)

inference_video_enhance.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import os
+import cv2
+import torch
+import argparse
+import numpy as np
+from tqdm import tqdm
+from torch.nn import functional as F
+import warnings
+import _thread
+import skvideo.io
+from queue import Queue, Empty
+from model.pytorch_msssim import ssim_matlab
+warnings.filterwarnings("ignore")
+def transferAudio(sourceVideo, targetVideo):
+    import shutil
+    import moviepy.editor
+    tempAudioFileName = "./temp/audio.mkv"
+    # split audio from original video file and store in "temp" directory
+    if True:
+        # clear old "temp" directory if it exits
+        if os.path.isdir("temp"):
+            # remove temp directory
+            shutil.rmtree("temp")
+        # create new "temp" directory
+        os.makedirs("temp")
+        # extract audio from video
+        os.system('ffmpeg -y -i "{}" -c:a copy -vn {}'.format(sourceVideo, tempAudioFileName))
+    targetNoAudio = os.path.splitext(targetVideo)[0] + "_noaudio" + os.path.splitext(targetVideo)[1]
+    os.rename(targetVideo, targetNoAudio)
+    # combine audio file and new video file
+    os.system('ffmpeg -y -i "{}" -i {} -c copy "{}"'.format(targetNoAudio, tempAudioFileName, targetVideo))
+    if os.path.getsize(targetVideo) == 0: # if ffmpeg failed to merge the video and audio together try converting the audio to aac
+        tempAudioFileName = "./temp/audio.m4a"
+        os.system('ffmpeg -y -i "{}" -c:a aac -b:a 160k -vn {}'.format(sourceVideo, tempAudioFileName))
+        os.system('ffmpeg -y -i "{}" -i {} -c copy "{}"'.format(targetNoAudio, tempAudioFileName, targetVideo))
+        if (os.path.getsize(targetVideo) == 0): # if aac is not supported by selected format
+            os.rename(targetNoAudio, targetVideo)
+            print("Audio transfer failed. Interpolated video will have no audio")
+        else:
+            print("Lossless audio transfer failed. Audio was transcoded to AAC (M4A) instead.")
+            # remove audio-less video
+            os.remove(targetNoAudio)
+    else:
+        os.remove(targetNoAudio)
+    # remove temp directory
+    shutil.rmtree("temp")
+parser = argparse.ArgumentParser(description='Video SR')
+parser.add_argument('--video', dest='video', type=str, default=None)
+parser.add_argument('--output', dest='output', type=str, default=None)
+parser.add_argument('--img', dest='img', type=str, default=None)
+parser.add_argument('--model', dest='modelDir', type=str, default='train_log_SAFA', help='directory with trained model files')
+parser.add_argument('--fp16', dest='fp16', action='store_true', help='fp16 mode for faster and more lightweight inference on cards with Tensor Cores')
+parser.add_argument('--png', dest='png', action='store_true', help='whether to vid_out png format vid_outs')
+parser.add_argument('--ext', dest='ext', type=str, default='mp4', help='vid_out video extension')
+args = parser.parse_args()
+assert (not args.video is None or not args.img is None)
+if not args.img is None:
+    args.png = True
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+torch.set_grad_enabled(False)
+if torch.cuda.is_available():
+    torch.backends.cudnn.enabled = True
+    torch.backends.cudnn.benchmark = True
+    if(args.fp16):
+        print('set fp16')
+        torch.set_default_tensor_type(torch.cuda.HalfTensor)
+try:
+    from train_log_SAFA.model import Model
+except:
+    print("Please download our model from model list")
+model = Model()
+model.device()
+model.load_model(args.modelDir)
+print("Loaded SAFA model.")
+model.eval()
+if not args.video is None:
+    videoCapture = cv2.VideoCapture(args.video)
+    fps = videoCapture.get(cv2.CAP_PROP_FPS)
+    tot_frame = videoCapture.get(cv2.CAP_PROP_FRAME_COUNT)
+    videoCapture.release()
+    fpsNotAssigned = True
+    videogen = skvideo.io.vreader(args.video)
+    lastframe = next(videogen)
+    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
+    video_path_wo_ext, ext = os.path.splitext(args.video)
+    if args.png == False and fpsNotAssigned == True:
+        print("The audio will be merged after interpolation process")
+    else:
+        print("Will not merge audio because using png or fps flag!")
+else:
+    videogen = []
+    for f in os.listdir(args.img):
+        if 'png' in f:
+            videogen.append(f)
+    tot_frame = len(videogen)
+    videogen.sort(key= lambda x:int(x[:-4]))
+    lastframe = cv2.imread(os.path.join(args.img, videogen[0]), cv2.IMREAD_UNCHANGED)[:, :, ::-1].copy()
+    videogen = videogen[1:]
+h, w, _ = lastframe.shape
+vid_out_name = None
+vid_out = None
+if args.png:
+    if not os.path.exists('vid_out'):
+        os.mkdir('vid_out')
+else:
+    if args.output is not None:
+        vid_out_name = args.output
+    else:
+        vid_out_name = '{}_2X{}'.format(video_path_wo_ext, ext)
+    vid_out = cv2.VideoWriter(vid_out_name, fourcc, fps, (w, h))
+def clear_write_buffer(user_args, write_buffer):
+    cnt = 0
+    while True:
+        item = write_buffer.get()
+        if item is None:
+            break
+        if user_args.png:
+            cv2.imwrite('vid_out/{:0>7d}.png'.format(cnt), item[:, :, ::-1])
+            cnt += 1
+        else:
+            vid_out.write(item[:, :, ::-1])
+def build_read_buffer(user_args, read_buffer, videogen):
+    for frame in videogen:
+        if not user_args.img is None:
+            frame = cv2.imread(os.path.join(user_args.img, frame), cv2.IMREAD_UNCHANGED)[:, :, ::-1].copy()
+        # if user_args.montage:
+        #    frame = frame[:, left: left + w]
+        read_buffer.put(frame)
+    read_buffer.put(None)
+def pad_image(img):
+    if(args.fp16):
+        return F.pad(img, padding, mode='reflect').half()
+    else:
+        return F.pad(img, padding, mode='reflect')
+tmp = 64
+ph = ((h - 1) // tmp + 1) * tmp
+pw = ((w - 1) // tmp + 1) * tmp
+padding = (0, pw - w, 0, ph - h)
+pbar = tqdm(total=tot_frame)
+write_buffer = Queue(maxsize=500)
+read_buffer = Queue(maxsize=500)
+_thread.start_new_thread(build_read_buffer, (args, read_buffer, videogen))
+_thread.start_new_thread(clear_write_buffer, (args, write_buffer))
+while True:
+    frame = read_buffer.get()
+    if frame is None:
+        break
+    # lastframe_2x = cv2.resize(lastframe, (0, 0), fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
+    # frame_2x = cv2.resize(frame, (0, 0), fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
+    I0 = pad_image(torch.from_numpy(np.transpose(lastframe, (2,0,1))).to(device, non_blocking=True).unsqueeze(0).float() / 255.)
+    I1 = pad_image(torch.from_numpy(np.transpose(frame, (2,0,1))).to(device, non_blocking=True).unsqueeze(0).float() / 255.)
+    I0_small = F.interpolate(I0, (32, 32), mode='bilinear', align_corners=False)
+    I1_small = F.interpolate(I1, (32, 32), mode='bilinear', align_corners=False)
+    ssim = ssim_matlab(I0_small[:, :3], I1_small[:, :3])
+    if ssim < 0.2:
+        out = [model.inference(I0, I0, [0])[0], model.inference(I1, I1, [0])[0]]
+    else:
+        out = model.inference(I0, I1, [0, 1])
+    assert(len(out) == 2)
+    write_buffer.put((out[0][0] * 255).byte().cpu().numpy().transpose(1, 2, 0)[:h, :w])
+    write_buffer.put((out[1][0] * 255).byte().cpu().numpy().transpose(1, 2, 0)[:h, :w])
+    lastframe = read_buffer.get()
+    if lastframe is None:
+        break
+    pbar.update(2)
+import time
+while(not write_buffer.empty()):
+    time.sleep(0.1)
+pbar.close()
+if not vid_out is None:
+    vid_out.release()
+# move audio to new video file if appropriate
+if args.png == False and fpsNotAssigned == True and not args.video is None:
+    try:
+        transferAudio(args.video, vid_out_name)
+    except:
+        print("Audio transfer failed. Interpolated video will have no audio")
+        targetNoAudio = os.path.splitext(vid_out_name)[0] + "_noaudio" + os.path.splitext(vid_out_name)[1]
+        os.rename(targetNoAudio, vid_out_name)

model/loss.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import torch
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as models
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+class EPE(nn.Module):
+    def __init__(self):
+        super(EPE, self).__init__()
+    def forward(self, flow, gt, loss_mask):
+        loss_map = (flow - gt.detach()) ** 2
+        loss_map = (loss_map.sum(1, True) + 1e-6) ** 0.5
+        return (loss_map * loss_mask)
+class Ternary(nn.Module):
+    def __init__(self):
+        super(Ternary, self).__init__()
+        patch_size = 7
+        out_channels = patch_size * patch_size
+        self.w = np.eye(out_channels).reshape(
+            (patch_size, patch_size, 1, out_channels))
+        self.w = np.transpose(self.w, (3, 2, 0, 1))
+        self.w = torch.tensor(self.w).float().to(device)
+    def transform(self, img):
+        patches = F.conv2d(img, self.w, padding=3, bias=None)
+        transf = patches - img
+        transf_norm = transf / torch.sqrt(0.81 + transf**2)
+        return transf_norm
+    def rgb2gray(self, rgb):
+        r, g, b = rgb[:, 0:1, :, :], rgb[:, 1:2, :, :], rgb[:, 2:3, :, :]
+        gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
+        return gray
+    def hamming(self, t1, t2):
+        dist = (t1 - t2) ** 2
+        dist_norm = torch.mean(dist / (0.1 + dist), 1, True)
+        return dist_norm
+    def valid_mask(self, t, padding):
+        n, _, h, w = t.size()
+        inner = torch.ones(n, 1, h - 2 * padding, w - 2 * padding).type_as(t)
+        mask = F.pad(inner, [padding] * 4)
+        return mask
+    def forward(self, img0, img1):
+        img0 = self.transform(self.rgb2gray(img0))
+        img1 = self.transform(self.rgb2gray(img1))
+        return self.hamming(img0, img1) * self.valid_mask(img0, 1)
+class SOBEL(nn.Module):
+    def __init__(self):
+        super(SOBEL, self).__init__()
+        self.kernelX = torch.tensor([
+            [1, 0, -1],
+            [2, 0, -2],
+            [1, 0, -1],
+        ]).float()
+        self.kernelY = self.kernelX.clone().T
+        self.kernelX = self.kernelX.unsqueeze(0).unsqueeze(0).to(device)
+        self.kernelY = self.kernelY.unsqueeze(0).unsqueeze(0).to(device)
+    def forward(self, pred, gt):
+        N, C, H, W = pred.shape[0], pred.shape[1], pred.shape[2], pred.shape[3]
+        img_stack = torch.cat(
+            [pred.reshape(N*C, 1, H, W), gt.reshape(N*C, 1, H, W)], 0)
+        sobel_stack_x = F.conv2d(img_stack, self.kernelX, padding=1)
+        sobel_stack_y = F.conv2d(img_stack, self.kernelY, padding=1)
+        pred_X, gt_X = sobel_stack_x[:N*C], sobel_stack_x[N*C:]
+        pred_Y, gt_Y = sobel_stack_y[:N*C], sobel_stack_y[N*C:]
+        L1X, L1Y = torch.abs(pred_X-gt_X), torch.abs(pred_Y-gt_Y)
+        loss = (L1X+L1Y)
+        return loss
+class MeanShift(nn.Conv2d):
+    def __init__(self, data_mean, data_std, data_range=1, norm=True):
+        c = len(data_mean)
+        super(MeanShift, self).__init__(c, c, kernel_size=1)
+        std = torch.Tensor(data_std)
+        self.weight.data = torch.eye(c).view(c, c, 1, 1)
+        if norm:
+            self.weight.data.div_(std.view(c, 1, 1, 1))
+            self.bias.data = -1 * data_range * torch.Tensor(data_mean)
+            self.bias.data.div_(std)
+        else:
+            self.weight.data.mul_(std.view(c, 1, 1, 1))
+            self.bias.data = data_range * torch.Tensor(data_mean)
+        self.requires_grad = False
+class VGGPerceptualLoss(torch.nn.Module):
+    def __init__(self, rank=0):
+        super(VGGPerceptualLoss, self).__init__()
+        blocks = []
+        pretrained = True
+        self.vgg_pretrained_features = models.vgg19(pretrained=pretrained).features
+        self.normalize = MeanShift([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], norm=True).cuda()
+        for param in self.parameters():
+            param.requires_grad = False
+    def forward(self, X, Y, indices=None):
+        X = self.normalize(X)
+        Y = self.normalize(Y)
+        indices = [2, 7, 12, 21, 30]
+        weights = [1.0/2.6, 1.0/4.8, 1.0/3.7, 1.0/5.6, 10/1.5]
+        k = 0
+        loss = 0
+        for i in range(indices[-1]):
+            X = self.vgg_pretrained_features[i](X)
+            Y = self.vgg_pretrained_features[i](Y)
+            if (i+1) in indices:
+                loss += weights[k] * (X - Y.detach()).abs().mean() * 0.1
+                k += 1
+        return loss
+if __name__ == '__main__':
+    img0 = torch.zeros(3, 3, 256, 256).float().to(device)
+    img1 = torch.tensor(np.random.normal(
+        0, 1, (3, 3, 256, 256))).float().to(device)
+    ternary_loss = Ternary()
+    print(ternary_loss(img0, img1).shape)

model/pytorch_msssim/__init__.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import torch
+import torch.nn.functional as F
+from math import exp
+import numpy as np
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def gaussian(window_size, sigma):
+    gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)])
+    return gauss/gauss.sum()
+def create_window(window_size, channel=1):
+    _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
+    _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0).to(device)
+    window = _2D_window.expand(channel, 1, window_size, window_size).contiguous()
+    return window
+def create_window_3d(window_size, channel=1):
+    _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
+    _2D_window = _1D_window.mm(_1D_window.t())
+    _3D_window = _2D_window.unsqueeze(2) @ (_1D_window.t())
+    window = _3D_window.expand(1, channel, window_size, window_size, window_size).contiguous().to(device)
+    return window
+def ssim(img1, img2, window_size=11, window=None, size_average=True, full=False, val_range=None):
+    # Value range can be different from 255. Other common ranges are 1 (sigmoid) and 2 (tanh).
+    if val_range is None:
+        if torch.max(img1) > 128:
+            max_val = 255
+        else:
+            max_val = 1
+        if torch.min(img1) < -0.5:
+            min_val = -1
+        else:
+            min_val = 0
+        L = max_val - min_val
+    else:
+        L = val_range
+    padd = 0
+    (_, channel, height, width) = img1.size()
+    if window is None:
+        real_size = min(window_size, height, width)
+        window = create_window(real_size, channel=channel).to(img1.device)
+    # mu1 = F.conv2d(img1, window, padding=padd, groups=channel)
+    # mu2 = F.conv2d(img2, window, padding=padd, groups=channel)
+    mu1 = F.conv2d(F.pad(img1, (5, 5, 5, 5), mode='replicate'), window, padding=padd, groups=channel)
+    mu2 = F.conv2d(F.pad(img2, (5, 5, 5, 5), mode='replicate'), window, padding=padd, groups=channel)
+    mu1_sq = mu1.pow(2)
+    mu2_sq = mu2.pow(2)
+    mu1_mu2 = mu1 * mu2
+    sigma1_sq = F.conv2d(F.pad(img1 * img1, (5, 5, 5, 5), 'replicate'), window, padding=padd, groups=channel) - mu1_sq
+    sigma2_sq = F.conv2d(F.pad(img2 * img2, (5, 5, 5, 5), 'replicate'), window, padding=padd, groups=channel) - mu2_sq
+    sigma12 = F.conv2d(F.pad(img1 * img2, (5, 5, 5, 5), 'replicate'), window, padding=padd, groups=channel) - mu1_mu2
+    C1 = (0.01 * L) ** 2
+    C2 = (0.03 * L) ** 2
+    v1 = 2.0 * sigma12 + C2
+    v2 = sigma1_sq + sigma2_sq + C2
+    cs = torch.mean(v1 / v2)  # contrast sensitivity
+    ssim_map = ((2 * mu1_mu2 + C1) * v1) / ((mu1_sq + mu2_sq + C1) * v2)
+    if size_average:
+        ret = ssim_map.mean()
+    else:
+        ret = ssim_map.mean(1).mean(1).mean(1)
+    if full:
+        return ret, cs
+    return ret
+def ssim_matlab(img1, img2, window_size=11, window=None, size_average=True, full=False, val_range=None):
+    # Value range can be different from 255. Other common ranges are 1 (sigmoid) and 2 (tanh).
+    if val_range is None:
+        if torch.max(img1) > 128:
+            max_val = 255
+        else:
+            max_val = 1
+        if torch.min(img1) < -0.5:
+            min_val = -1
+        else:
+            min_val = 0
+        L = max_val - min_val
+    else:
+        L = val_range
+    padd = 0
+    (_, _, height, width) = img1.size()
+    if window is None:
+        real_size = min(window_size, height, width)
+        window = create_window_3d(real_size, channel=1).to(img1.device)
+        # Channel is set to 1 since we consider color images as volumetric images
+    img1 = img1.unsqueeze(1)
+    img2 = img2.unsqueeze(1)
+    mu1 = F.conv3d(F.pad(img1, (5, 5, 5, 5, 5, 5), mode='replicate'), window, padding=padd, groups=1)
+    mu2 = F.conv3d(F.pad(img2, (5, 5, 5, 5, 5, 5), mode='replicate'), window, padding=padd, groups=1)
+    mu1_sq = mu1.pow(2)
+    mu2_sq = mu2.pow(2)
+    mu1_mu2 = mu1 * mu2
+    sigma1_sq = F.conv3d(F.pad(img1 * img1, (5, 5, 5, 5, 5, 5), 'replicate'), window, padding=padd, groups=1) - mu1_sq
+    sigma2_sq = F.conv3d(F.pad(img2 * img2, (5, 5, 5, 5, 5, 5), 'replicate'), window, padding=padd, groups=1) - mu2_sq
+    sigma12 = F.conv3d(F.pad(img1 * img2, (5, 5, 5, 5, 5, 5), 'replicate'), window, padding=padd, groups=1) - mu1_mu2
+    C1 = (0.01 * L) ** 2
+    C2 = (0.03 * L) ** 2
+    v1 = 2.0 * sigma12 + C2
+    v2 = sigma1_sq + sigma2_sq + C2
+    cs = torch.mean(v1 / v2)  # contrast sensitivity
+    ssim_map = ((2 * mu1_mu2 + C1) * v1) / ((mu1_sq + mu2_sq + C1) * v2)
+    if size_average:
+        ret = ssim_map.mean()
+    else:
+        ret = ssim_map.mean(1).mean(1).mean(1)
+    if full:
+        return ret, cs
+    return ret
+def msssim(img1, img2, window_size=11, size_average=True, val_range=None, normalize=False):
+    device = img1.device
+    weights = torch.FloatTensor([0.0448, 0.2856, 0.3001, 0.2363, 0.1333]).to(device)
+    levels = weights.size()[0]
+    mssim = []
+    mcs = []
+    for _ in range(levels):
+        sim, cs = ssim(img1, img2, window_size=window_size, size_average=size_average, full=True, val_range=val_range)
+        mssim.append(sim)
+        mcs.append(cs)
+        img1 = F.avg_pool2d(img1, (2, 2))
+        img2 = F.avg_pool2d(img2, (2, 2))
+    mssim = torch.stack(mssim)
+    mcs = torch.stack(mcs)
+    # Normalize (to avoid NaNs during training unstable models, not compliant with original definition)
+    if normalize:
+        mssim = (mssim + 1) / 2
+        mcs = (mcs + 1) / 2
+    pow1 = mcs ** weights
+    pow2 = mssim ** weights
+    # From Matlab implementation https://ece.uwaterloo.ca/~z70wang/research/iwssim/
+    output = torch.prod(pow1[:-1] * pow2[-1])
+    return output
+# Classes to re-use window
+class SSIM(torch.nn.Module):
+    def __init__(self, window_size=11, size_average=True, val_range=None):
+        super(SSIM, self).__init__()
+        self.window_size = window_size
+        self.size_average = size_average
+        self.val_range = val_range
+        # Assume 3 channel for SSIM
+        self.channel = 3
+        self.window = create_window(window_size, channel=self.channel)
+    def forward(self, img1, img2):
+        (_, channel, _, _) = img1.size()
+        if channel == self.channel and self.window.dtype == img1.dtype:
+            window = self.window
+        else:
+            window = create_window(self.window_size, channel).to(img1.device).type(img1.dtype)
+            self.window = window
+            self.channel = channel
+        _ssim = ssim(img1, img2, window=window, window_size=self.window_size, size_average=self.size_average)
+        dssim = (1 - _ssim) / 2
+        return dssim
+class MSSSIM(torch.nn.Module):
+    def __init__(self, window_size=11, size_average=True, channel=3):
+        super(MSSSIM, self).__init__()
+        self.window_size = window_size
+        self.size_average = size_average
+        self.channel = channel
+    def forward(self, img1, img2):
+        return msssim(img1, img2, window_size=self.window_size, size_average=self.size_average)

model/pytorch_msssim/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (10.3 kB). View file

model/warplayer.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import torch
+import torch.nn as nn
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+backwarp_tenGrid = {}
+def warp(tenInput, tenFlow):
+    k = (str(tenFlow.device), str(tenFlow.size()))
+    if k not in backwarp_tenGrid:
+        tenHorizontal = torch.linspace(-1.0, 1.0, tenFlow.shape[3], device=device).view(
+            1, 1, 1, tenFlow.shape[3]).expand(tenFlow.shape[0], -1, tenFlow.shape[2], -1)
+        tenVertical = torch.linspace(-1.0, 1.0, tenFlow.shape[2], device=device).view(
+            1, 1, tenFlow.shape[2], 1).expand(tenFlow.shape[0], -1, -1, tenFlow.shape[3])
+        backwarp_tenGrid[k] = torch.cat(
+            [tenHorizontal, tenVertical], 1).to(device)
+    tenFlow = torch.cat([tenFlow[:, 0:1, :, :] / ((tenInput.shape[3] - 1.0) / 2.0),
+                         tenFlow[:, 1:2, :, :] / ((tenInput.shape[2] - 1.0) / 2.0)], 1)
+    g = (backwarp_tenGrid[k] + tenFlow).permute(0, 2, 3, 1)
+    return torch.nn.functional.grid_sample(input=tenInput, grid=g, mode='bilinear', padding_mode='border', align_corners=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+numpy>=1.16, <=1.23.5
+tqdm>=4.35.0
+sk-video>=1.1.10
+torch>=1.3.0
+opencv-python>=4.1.2
+moviepy>=1.0.3
+torchvision>=0.7.0

video_processing.py ADDED Viewed

	@@ -0,0 +1,235 @@

+import os
+import cv2
+import torch
+import numpy as np
+from tqdm import tqdm
+from torch.nn import functional as F
+import warnings
+import _thread
+import skvideo.io
+from queue import Queue, Empty
+from model.pytorch_msssim import ssim_matlab
+warnings.filterwarnings("ignore")
+def transferAudio(sourceVideo, targetVideo):
+    import shutil
+    import moviepy.editor
+    tempAudioFileName = "./temp/audio.mkv"
+    # split audio from original video file and store in "temp" directory
+    if True:
+        # clear old "temp" directory if it exits
+        if os.path.isdir("temp"):
+            # remove temp directory
+            shutil.rmtree("temp")
+        # create new "temp" directory
+        os.makedirs("temp")
+        # extract audio from video
+        os.system('ffmpeg -y -i "{}" -c:a copy -vn {}'.format(sourceVideo, tempAudioFileName))
+    targetNoAudio = os.path.splitext(targetVideo)[0] + "_noaudio" + os.path.splitext(targetVideo)[1]
+    os.rename(targetVideo, targetNoAudio)
+    # combine audio file and new video file
+    os.system('ffmpeg -y -i "{}" -i {} -c copy "{}"'.format(targetNoAudio, tempAudioFileName, targetVideo))
+    if os.path.getsize(targetVideo) == 0: # if ffmpeg failed to merge the video and audio together try converting the audio to aac
+        tempAudioFileName = "./temp/audio.m4a"
+        os.system('ffmpeg -y -i "{}" -c:a aac -b:a 160k -vn {}'.format(sourceVideo, tempAudioFileName))
+        os.system('ffmpeg -y -i "{}" -i {} -c copy "{}"'.format(targetNoAudio, tempAudioFileName, targetVideo))
+        if (os.path.getsize(targetVideo) == 0): # if aac is not supported by selected format
+            os.rename(targetNoAudio, targetVideo)
+            print("Audio transfer failed. Interpolated video will have no audio")
+        else:
+            print("Lossless audio transfer failed. Audio was transcoded to AAC (M4A) instead.")
+            # remove audio-less video
+            os.remove(targetNoAudio)
+    else:
+        os.remove(targetNoAudio)
+    # remove temp directory
+    shutil.rmtree("temp")
+def process_video(video, output, modelDir, fp16, UHD, scale, skip, fps, png, ext, exp, multi):
+    if exp != 1:
+        multi = (2 ** exp)
+    assert (not video is None)
+    if skip:
+        print("skip flag is abandoned, please refer to issue #207.")
+    if UHD and scale==1.0:
+        scale = 0.5
+    assert scale in [0.25, 0.5, 1.0, 2.0, 4.0]
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    torch.set_grad_enabled(False)
+    if torch.cuda.is_available():
+        torch.backends.cudnn.enabled = True
+        torch.backends.cudnn.benchmark = True
+        if(fp16):
+            torch.set_default_tensor_type(torch.cuda.HalfTensor)
+    from rife.train_log.RIFE_HDv3 import Model
+    model = Model()
+    if not hasattr(model, 'version'):
+        model.version = 0
+    model.load_model(modelDir, -1)
+    print("Loaded 3.x/4.x HD model.")
+    model.eval()
+    model.device()
+    videoCapture = cv2.VideoCapture(video)
+    fps_in = videoCapture.get(cv2.CAP_PROP_FPS)
+    tot_frame = videoCapture.get(cv2.CAP_PROP_FRAME_COUNT)
+    videoCapture.release()
+    if fps is None:
+        fpsNotAssigned = True
+        fps_out = fps_in * multi
+    else:
+        fpsNotAssigned = False
+        fps_out = fps
+    videogen = skvideo.io.vreader(video)
+    lastframe = next(videogen)
+    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
+    video_path_wo_ext, video_ext = os.path.splitext(video)
+    print('{}.{}, {} frames in total, {}FPS to {}FPS'.format(video_path_wo_ext, ext, tot_frame, fps_in, fps_out))
+    if png == False and fpsNotAssigned == True:
+        print("The audio will be merged after interpolation process")
+    else:
+        print("Will not merge audio because using png or fps flag!")
+    h, w, _ = lastframe.shape
+    vid_out_name = None
+    vid_out = None
+    if png:
+        if not os.path.exists('vid_out'):
+            os.mkdir('vid_out')
+    else:
+        if output is not None:
+            vid_out_name = output
+        else:
+            vid_out_name = '{}_{}X_{}fps.{}'.format(video_path_wo_ext, multi, int(np.round(fps_out)), ext)
+        vid_out = cv2.VideoWriter(vid_out_name, fourcc, fps_out, (w, h))
+    def clear_write_buffer(user_args, write_buffer):
+        cnt = 0
+        while True:
+            item = write_buffer.get()
+            if item is None:
+                break
+            if user_args.png:
+                cv2.imwrite('vid_out/{:0>7d}.png'.format(cnt), item[:, :, ::-1])
+                cnt += 1
+            else:
+                vid_out.write(item[:, :, ::-1])
+    def build_read_buffer(user_args, read_buffer, videogen):
+        try:
+            for frame in videogen:
+                read_buffer.put(frame)
+        except:
+            pass
+        read_buffer.put(None)
+    def make_inference(I0, I1, n):
+        if model.version >= 3.9:
+            res = []
+            for i in range(n):
+                res.append(model.inference(I0, I1, (i+1) * 1. / (n+1), scale))
+            return res
+        else:
+            middle = model.inference(I0, I1, scale)
+            if n == 1:
+                return [middle]
+            first_half = make_inference(I0, middle, n=n//2)
+            second_half = make_inference(middle, I1, n=n//2)
+            if n%2:
+                return [*first_half, middle, *second_half]
+            else:
+                return [*first_half, *second_half]
+    def pad_image(img):
+        if(fp16):
+            return F.pad(img, padding).half()
+        else:
+            return F.pad(img, padding)
+    tmp = max(128, int(128 / scale))
+    ph = ((h - 1) // tmp + 1) * tmp
+    pw = ((w - 1) // tmp + 1) * tmp
+    padding = (0, pw - w, 0, ph - h)
+    pbar = tqdm(total=tot_frame)
+    write_buffer = Queue(maxsize=500)
+    read_buffer = Queue(maxsize=500)
+    _thread.start_new_thread(build_read_buffer, ((), read_buffer, videogen))
+    _thread.start_new_thread(clear_write_buffer, ((), write_buffer))
+    I1 = torch.from_numpy(np.transpose(lastframe, (2,0,1))).to(device, non_blocking=True).unsqueeze(0).float() / 255.
+    I1 = pad_image(I1)
+    temp = None # save lastframe when processing static frame
+    while True:
+        if temp is not None:
+            frame = temp
+            temp = None
+        else:
+            frame = read_buffer.get()
+        if frame is None:
+            break
+        I0 = I1
+        I1 = torch.from_numpy(np.transpose(frame, (2,0,1))).to(device, non_blocking=True).unsqueeze(0).float() / 255.
+        I1 = pad_image(I1)
+        I0_small = F.interpolate(I0, (32, 32), mode='bilinear', align_corners=False)
+        I1_small = F.interpolate(I1, (32, 32), mode='bilinear', align_corners=False)
+        ssim = ssim_matlab(I0_small[:, :3], I1_small[:, :3])
+        break_flag = False
+        if ssim > 0.996:
+            frame = read_buffer.get() # read a new frame
+            if frame is None:
+                break_flag = True
+                frame = lastframe
+            else:
+                temp = frame
+            I1 = torch.from_numpy(np.transpose(frame, (2,0,1))).to(device, non_blocking=True).unsqueeze(0).float() / 255.
+            I1 = pad_image(I1)
+            I1 = model.inference(I0, I1, scale=scale)
+            I1_small = F.interpolate(I1, (32, 32), mode='bilinear', align_corners=False)
+            ssim = ssim_matlab(I0_small[:, :3], I1_small[:, :3])
+            frame = (I1[0] * 255).byte().cpu().numpy().transpose(1, 2, 0)[:h, :w]
+        if ssim < 0.2:
+            output = []
+            for i in range(multi - 1):
+                output.append(I0)
+        else:
+            output = make_inference(I0, I1, multi - 1)
+        write_buffer.put(lastframe)
+        for mid in output:
+            mid = (((mid[0] * 255.).byte().cpu().numpy().transpose(1, 2, 0)))
+            write_buffer.put(mid[:h, :w])
+        pbar.update(1)
+        lastframe = frame
+        if break_flag:
+            break
+    write_buffer.put(lastframe)
+    write_buffer.put(None)
+    import time
+    while(not write_buffer.empty()):
+        time.sleep(0.1)
+    pbar.close()
+    if not vid_out is None:
+        vid_out.release()
+    if png == False and fpsNotAssigned == True and not video is None:
+        try:
+            transferAudio(video, vid_out_name)
+        except:
+            print("Audio transfer failed. Interpolated video will have no audio")
+            targetNoAudio = os.path.splitext(vid_out_name)[0] + "_noaudio" + os.path.splitext(vid_out_name)[1]
+            os.rename(targetNoAudio, vid_out_name)
+    return vid_out_name