Spaces:
Runtime error
Runtime error
| import lpips | |
| import numpy as np | |
| import torch | |
| spatial = True # Return a spatial map of perceptual distance. | |
| # Linearly calibrated models (LPIPS) | |
| loss_fn = lpips.LPIPS(net="alex", spatial=spatial) # Can also set net = 'squeeze' or 'vgg' | |
| # loss_fn = lpips.LPIPS(net='alex', spatial=spatial, lpips=False) # Can also set net = 'squeeze' or 'vgg' | |
| def trans(x): | |
| # if greyscale images add channel | |
| if x.shape[-3] == 1: | |
| x = x.repeat(1, 1, 3, 1, 1) | |
| # value range [0, 1] -> [-1, 1] | |
| x = x * 2 - 1 | |
| return x | |
| def calculate_lpips(videos1, videos2, device): | |
| # image should be RGB, IMPORTANT: normalized to [-1,1] | |
| assert videos1.shape == videos2.shape | |
| # videos [batch_size, timestamps, channel, h, w] | |
| # support grayscale input, if grayscale -> channel*3 | |
| # value range [0, 1] -> [-1, 1] | |
| videos1 = trans(videos1) | |
| videos2 = trans(videos2) | |
| lpips_results = [] | |
| for video_num in range(videos1.shape[0]): | |
| # get a video | |
| # video [timestamps, channel, h, w] | |
| video1 = videos1[video_num] | |
| video2 = videos2[video_num] | |
| lpips_results_of_a_video = [] | |
| for clip_timestamp in range(len(video1)): | |
| # get a img | |
| # img [timestamps[x], channel, h, w] | |
| # img [channel, h, w] tensor | |
| img1 = video1[clip_timestamp].unsqueeze(0).to(device) | |
| img2 = video2[clip_timestamp].unsqueeze(0).to(device) | |
| loss_fn.to(device) | |
| # calculate lpips of a video | |
| lpips_results_of_a_video.append(loss_fn.forward(img1, img2).mean().detach().cpu().tolist()) | |
| lpips_results.append(lpips_results_of_a_video) | |
| lpips_results = np.array(lpips_results) | |
| lpips = {} | |
| lpips_std = {} | |
| for clip_timestamp in range(len(video1)): | |
| lpips[clip_timestamp] = np.mean(lpips_results[:, clip_timestamp]) | |
| lpips_std[clip_timestamp] = np.std(lpips_results[:, clip_timestamp]) | |
| result = { | |
| "value": lpips, | |
| "value_std": lpips_std, | |
| "video_setting": video1.shape, | |
| "video_setting_name": "time, channel, heigth, width", | |
| } | |
| return result | |
| # test code / using example | |
| def main(): | |
| NUMBER_OF_VIDEOS = 8 | |
| VIDEO_LENGTH = 50 | |
| CHANNEL = 3 | |
| SIZE = 64 | |
| videos1 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False) | |
| videos2 = torch.ones(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False) | |
| device = torch.device("cuda") | |
| # device = torch.device("cpu") | |
| import json | |
| result = calculate_lpips(videos1, videos2, device) | |
| print(json.dumps(result, indent=4)) | |
| if __name__ == "__main__": | |
| main() | |