Spaces:

LanguageBind
/

Open-Sora-Plan-v1.0.0

Runtime error

Open-Sora-Plan-v1.0.0 / opensora /eval /cal_ssim.py

LinB203

a220803 over 1 year ago

3.51 kB

	import numpy as np
	import torch
	from tqdm import tqdm
	import cv2

	def ssim(img1, img2):
	C1 = 0.01 ** 2
	C2 = 0.03 ** 2
	img1 = img1.astype(np.float64)
	img2 = img2.astype(np.float64)
	kernel = cv2.getGaussianKernel(11, 1.5)
	window = np.outer(kernel, kernel.transpose())
	mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5] # valid
	mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
	mu1_sq = mu1 ** 2
	mu2_sq = mu2 ** 2
	mu1_mu2 = mu1 * mu2
	sigma1_sq = cv2.filter2D(img1 ** 2, -1, window)[5:-5, 5:-5] - mu1_sq
	sigma2_sq = cv2.filter2D(img2 ** 2, -1, window)[5:-5, 5:-5] - mu2_sq
	sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
	ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) *
	(sigma1_sq + sigma2_sq + C2))
	return ssim_map.mean()


	def calculate_ssim_function(img1, img2):
	# [0,1]
	# ssim is the only metric extremely sensitive to gray being compared to b/w
	if not img1.shape == img2.shape:
	raise ValueError('Input images must have the same dimensions.')
	if img1.ndim == 2:
	return ssim(img1, img2)
	elif img1.ndim == 3:
	if img1.shape[0] == 3:
	ssims = []
	for i in range(3):
	ssims.append(ssim(img1[i], img2[i]))
	return np.array(ssims).mean()
	elif img1.shape[0] == 1:
	return ssim(np.squeeze(img1), np.squeeze(img2))
	else:
	raise ValueError('Wrong input image dimensions.')

	def trans(x):
	return x

	def calculate_ssim(videos1, videos2):
	print("calculate_ssim...")

	# videos [batch_size, timestamps, channel, h, w]

	assert videos1.shape == videos2.shape

	videos1 = trans(videos1)
	videos2 = trans(videos2)

	ssim_results = []

	for video_num in tqdm(range(videos1.shape[0])):
	# get a video
	# video [timestamps, channel, h, w]
	video1 = videos1[video_num]
	video2 = videos2[video_num]

	ssim_results_of_a_video = []
	for clip_timestamp in range(len(video1)):
	# get a img
	# img [timestamps[x], channel, h, w]
	# img [channel, h, w] numpy

	img1 = video1[clip_timestamp].numpy()
	img2 = video2[clip_timestamp].numpy()

	# calculate ssim of a video
	ssim_results_of_a_video.append(calculate_ssim_function(img1, img2))

	ssim_results.append(ssim_results_of_a_video)

	ssim_results = np.array(ssim_results)

	ssim = {}
	ssim_std = {}

	for clip_timestamp in range(len(video1)):
	ssim[clip_timestamp] = np.mean(ssim_results[:,clip_timestamp])
	ssim_std[clip_timestamp] = np.std(ssim_results[:,clip_timestamp])

	result = {
	"value": ssim,
	"value_std": ssim_std,
	"video_setting": video1.shape,
	"video_setting_name": "time, channel, heigth, width",
	}

	return result

	# test code / using example

	def main():
	NUMBER_OF_VIDEOS = 8
	VIDEO_LENGTH = 50
	CHANNEL = 3
	SIZE = 64
	videos1 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False)
	videos2 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False)
	device = torch.device("cuda")

	import json
	result = calculate_ssim(videos1, videos2)
	print(json.dumps(result, indent=4))

	if __name__ == "__main__":
	main()