Spaces:

IndexTeam
/

AnisoraV3

Running on Zero

App Files Files Community

AnisoraV3 / OSS /OSS.py

nyarunyarunya

Upload folder using huggingface_hub

4f7b5ea verified 16 days ago

raw

history blame

12.3 kB

	import torch
	import numpy as np
	import logging


	from .utils import _broadcast_tensor

	def cal_medium(oss_steps_all):
	ave_steps = []
	for k in range(len(oss_steps_all[0])):
	l = []
	for i in range(len(oss_steps_all)):
	l.append(oss_steps_all[i][k])
	l.sort()
	ave_steps.append((l[len(l)//2] + l[len(l)//2 - 1])//2)
	return ave_steps




	@torch.no_grad()
	def infer_OSS(oss_steps, model, z, class_emb, device, renorm_flag=False, max_amp=None, min_amp=None, float32=True, model_kwargs=None):
	# z [B,C.H,W]

	N = len(oss_steps)
	B = z.shape[0]
	oss_steps = [0] + oss_steps
	ori_z = z.clone()

	# First is zero
	timesteps = model.fm_steps

	if model_kwargs is None:
	model_kwargs = {}

	for i in reversed(range(1,N+1)):
	logging.info(f"steps {i}")
	t = torch.ones((B,), device=device, dtype=torch.long) * oss_steps[i]

	if renorm_flag:

	max_s = torch.quantile(z.reshape((z.shape[0], -1)), 0.95, dim=1)
	min_s = torch.quantile(z.reshape((z.shape[0], -1)), 0.05, dim=1)


	max_amp_tmp = max_amp[oss_steps[i]-1]
	min_amp_tmp = min_amp[oss_steps[i]-1]

	ak = (max_amp_tmp - min_amp_tmp)/(max_s - min_s)
	ab = min_amp_tmp - ak*min_s

	z = _broadcast_tensor(ak, z.shape) *z + _broadcast_tensor(ab, z.shape)

	vt = model(z, t, class_emb, model_kwargs)
	if float32:
	z = z.to(torch.float32)
	z = z + (timesteps[oss_steps[i-1]] - timesteps[oss_steps[i]]) * vt
	if float32:
	z = z.to(ori_z.dtype)

	return z

	@torch.no_grad()
	def search_OSS_video(model, z, batch_size, class_emb, device, teacher_steps=200, student_steps=5, norm=2, model_kwargs=None, frame_type="6", channel_type="4", random_channel=False, float32=True):
	# z [B,C.H,W]
	# model_kwargs doesn't contain class_embedding, which is another seperate input
	# the class_embedding is the same for all the searching samples here.

	B = batch_size
	N = teacher_steps
	STEP = student_steps
	assert z.shape[0] == B
	channel_size = z.shape[1]
	frame_size = z.shape[2]


	# First is zero
	timesteps = model.fm_steps


	if model_kwargs is None:
	model_kwargs = {}

	# Compute the teacher trajectory
	traj_tea = torch.stack([torch.ones_like(z)]*(N+1), dim = 0)
	traj_tea[N] = z
	z_tea = z.clone()

	for i in reversed(range(1,N+1)):
	print("teachering,%s"%i)

	t = torch.ones((B,), device=device, dtype=torch.long) * i
	vt = model(z_tea, t, class_emb, model_kwargs)
	if float32:
	z_tea = z_tea.to(torch.float32)
	z_tea = z_tea + vt * (timesteps[i-1] - timesteps[i])
	if float32:
	z_tea = z_tea.to(z.dtype)


	traj_tea[i-1] = z_tea.clone()


	# solving dynamic programming
	all_steps = []


	for i_batch in range(B): # process each image separately
	z_cur = z[i_batch].clone().unsqueeze(0)
	if class_emb is not None:
	class_emb_cur = class_emb[i_batch].clone().unsqueeze(0)
	else:
	class_emb_cur = None

	tracestep = [torch.ones(N+1, device=device, dtype=torch.long)*N for _ in range(STEP+1)]
	dp = torch.ones((STEP+1,N+1), device=device)*torch.inf
	z_prev = torch.cat([z_cur]*(N+1),dim=0)
	z_next = z_prev.clone()

	for k in range(STEP):
	print("studenting,%s" % k)
	logging.info(f"Doing k step solving {k}")

	if random_channel and channel_type != "all":
	channel_select = np.random.choice(range(channel_size), size=int(channel_type), replace=False)

	for i in reversed(range(1,N+1)):
	z_i = z_prev[i].unsqueeze(0)
	t = torch.ones((1,), device=device, dtype=torch.long) * i
	vt = model(z_i, t, class_emb_cur, model_kwargs)

	for j in reversed(range(0,i)):
	if float32:
	z_i = z_i.to(torch.float32)
	z_nxt = z_i + vt * (timesteps[j] - timesteps[i])
	if float32:
	z_nxt = z_nxt.to(z.dtype)


	if random_channel:
	pass
	elif channel_type == "all":
	channel_select = list(range(channel_size))
	else:
	channel_select = list(range(int(channel_type)))

	if frame_type == "all":
	frame_select = list(range(frame_size))
	else:
	frame_select = torch.linspace(0,frame_size-1,int(frame_type),dtype=torch.long).tolist()



	channel_select_tensor = torch.tensor(channel_select, dtype=torch.long, device=device)
	frame_select_tensor = torch.tensor(frame_select, dtype=torch.long, device=device)

	z_nxt_select = z_nxt[0, channel_select_tensor,...]
	traj_tea_select = traj_tea[j, i_batch, channel_select_tensor, ...]

	z_nxt_select = z_nxt_select[:,frame_select_tensor,... ]
	traj_tea_select = traj_tea_select[:,frame_select_tensor,... ]


	cost = (torch.abs(z_nxt_select - traj_tea_select))**norm
	cost = cost.mean()

	if cost < dp[k][j]:
	dp[k][j] = cost
	tracestep[k][j] = i
	z_next[j] = z_nxt

	dp[k+1] = dp[k].clone()
	tracestep[k+1] = tracestep[k].clone()
	z_prev = z_next.clone()


	logging.info(f"finish {k} steps")

	cur_step = [0]
	for kk in reversed(range(k+1)):
	j = cur_step[-1]
	cur_step.append(int(tracestep[kk][j].item()))

	logging.info(cur_step)


	# trace back
	final_step = [0]
	for k in reversed(range(STEP)):
	j = final_step[-1]
	final_step.append(int(tracestep[k][j].item()))
	logging.info(final_step)
	all_steps.append(final_step[1:])

	return all_steps[0]



	@torch.no_grad()
	def search_OSS(model, z, batch_size, class_emb, device, teacher_steps=200, student_steps=5, model_kwargs=None):
	# z [B,C.H,W]
	# model_kwargs doesn't contain class_embedding, which is another seperate input
	# the class_embedding is the same for all the searching samples here.

	B = batch_size
	N = teacher_steps
	STEP = student_steps
	assert z.shape[0] == B

	# First is zero
	timesteps = model.fm_steps


	if model_kwargs is None:
	model_kwargs = {}

	# Compute the teacher trajectory
	traj_tea = torch.stack([torch.ones_like(z)]*(N+1), dim = 0)
	traj_tea[N] = z
	z_tea = z.clone()

	for i in reversed(range(1,N+1)):

	t = torch.ones((B,), device=device, dtype=torch.long) * i
	vt = model(z_tea, t, class_emb, model_kwargs)
	z_tea = z_tea + vt * (timesteps[i-1] - timesteps[i])
	traj_tea[i-1] = z_tea.clone()

	# solving dynamic programming
	all_steps = []

	for i_batch in range(B): # process each image separately
	z_cur = z[i_batch].clone().unsqueeze(0)
	if class_emb is not None:
	class_emb_cur = class_emb[i_batch].clone().unsqueeze(0)
	else:
	class_emb_cur = None
	tracestep = [torch.ones(N+1, device=device, dtype=torch.long)*N for _ in range(STEP+1)]
	dp = torch.ones((STEP+1,N+1), device=device)*torch.inf
	z_prev = torch.cat([z_cur]*(N+1),dim=0)
	z_next = z_prev.clone()


	for k in range(STEP):
	logging.info(f"Doing k step solving {k}")
	for i in reversed(range(1,N+1)):
	z_i = z_prev[i].unsqueeze(0)
	t = torch.ones((1,), device=device, dtype=torch.long) * i
	vt = model(z_i, t, class_emb_cur, model_kwargs)

	for j in reversed(range(0,i)):
	z_j = z_i + vt * (timesteps[j] - timesteps[i])
	cost = (z_j - traj_tea[j, i_batch])**2
	cost = cost.mean()

	if cost < dp[k][j]:
	dp[k][j] = cost
	tracestep[k][j] = i
	z_next[j] = z_j

	dp[k+1] = dp[k].clone()
	tracestep[k+1] = tracestep[k].clone()
	z_prev = z_next.clone()

	# trace back
	final_step = [0]
	for k in reversed(range(STEP)):
	j = final_step[-1]
	final_step.append(int(tracestep[k][j].item()))
	logging.info(final_step)
	all_steps.append(final_step[1:])

	return all_steps




	@torch.no_grad()
	def search_OSS_batch(model, z, batch_size, class_emb, device, teacher_steps=200, student_steps=5, model_kwargs=None):
	# z [B,C.H,W]
	# model_kwargs doesn't contain class_embedding, which is another seperate input
	# the class_embedding is the same for all the searching samples here.

	B = batch_size
	N = teacher_steps
	STEP = student_steps


	# First is zero
	timesteps = model.fm_steps

	if model_kwargs is None:
	model_kwargs = {}

	# Compute the teacher trajectory
	traj_tea = torch.stack([torch.ones_like(z)]*(N+1), dim = 0)
	traj_tea[N] = z
	z_tea = z.clone()

	for i in reversed(range(1,N+1)):
	t = torch.ones((B,), device=device, dtype=torch.long) * i
	vt = model(z_tea, t, class_emb, model_kwargs)
	z_tea = z_tea + (timesteps[i-1] - timesteps[i]) * vt
	traj_tea[i-1] = z_tea.clone()



	times = torch.linspace(0,N,N+1, device=device).long()
	t_in = torch.linspace(1, N, N, device=device).long()
	# solving dynamic programming
	all_steps = []

	for i_batch in range(B): # process each image separately
	z_cur = z[i_batch].clone().unsqueeze(0)
	if class_emb is not None:
	class_emb_cur = class_emb[i_batch].clone().unsqueeze(0).expand(N)
	else:
	class_emb_cur = None
	tracestep = [torch.ones(N+1, device=device, dtype=torch.long)*N for _ in range(STEP+1)]
	dp = torch.ones((STEP+1,N+1), device=device)*torch.inf
	z_prev = torch.cat([z_cur]*(N+1),dim=0)
	z_next = z_prev.clone()


	for k in range(STEP):
	logging.info(f"Doing k step solving {k}")
	vt = model(z_prev[1:], t_in, class_emb_cur, model_kwargs)

	for i in reversed(range(1,N+1)):
	t = torch.ones((i,), device=device, dtype=torch.long) * i
	z_i_batch = torch.stack([z_prev[i]]*i ,dim=0)
	dt = timesteps[times[:i]] - timesteps[t]
	z_j_batch = z_i_batch[:i] + _broadcast_tensor(dt, z_i_batch.shape) * vt[i-1].unsqueeze(0)

	cost = (z_j_batch - traj_tea[:i,i_batch,...])**2
	cost = cost.mean(dim = tuple(range(1, len(cost.shape))))
	mask = cost < dp[k, :i]

	dp[k, :i] = torch.where(mask, cost, dp[k, :i])
	tracestep[k][:i] = torch.where(mask, i, tracestep[k][:i])
	expanded_mask = _broadcast_tensor(mask,z_i_batch.shape)
	z_next[:i] = torch.where(expanded_mask, z_j_batch, z_next[:i])


	dp[k+1] = dp[k].clone()
	tracestep[k+1] = tracestep[k].clone()
	z_prev = z_next.clone()

	# trace back
	final_step = [0]
	for k in reversed(range(STEP)):
	j = final_step[-1]
	final_step.append(int(tracestep[k][j].item()))
	logging.info(final_step)
	all_steps.append(final_step[1:])

	return all_steps