tonyshark
/

ml_test

Model card Files Files and versions Community

ml_test / lycoris /modules /dylora.py

tonyshark

Upload 132 files

cc69848 verified 2 months ago

raw

history blame contribute delete

5.18 kB

	import math
	import random

	import torch
	import torch.nn as nn

	from .base import LycorisBaseModule
	from ..utils import product


	class DyLoraModule(LycorisBaseModule):
	support_module = {
	"linear",
	"conv1d",
	"conv2d",
	"conv3d",
	}

	def __init__(
	self,
	lora_name,
	org_module: nn.Module,
	multiplier=1.0,
	lora_dim=4,
	alpha=1,
	dropout=0.0,
	rank_dropout=0.0,
	module_dropout=0.0,
	use_tucker=False,
	block_size=4,
	use_scalar=False,
	rank_dropout_scale=False,
	weight_decompose=False,
	bypass_mode=None,
	rs_lora=False,
	train_on_input=False,
	**kwargs,
	):
	"""if alpha == 0 or None, alpha is rank (no scaling)."""
	super().__init__(
	lora_name,
	org_module,
	multiplier,
	dropout,
	rank_dropout,
	module_dropout,
	rank_dropout_scale,
	bypass_mode,
	)
	if self.module_type not in self.support_module:
	raise ValueError(f"{self.module_type} is not supported in IA^3 algo.")
	assert lora_dim % block_size == 0, "lora_dim must be a multiple of block_size"
	self.block_count = lora_dim // block_size
	self.block_size = block_size

	shape = (
	self.shape[0],
	product(self.shape[1:]),
	)

	self.lora_dim = lora_dim
	self.up_list = nn.ParameterList(
	[torch.empty(shape[0], self.block_size) for i in range(self.block_count)]
	)
	self.down_list = nn.ParameterList(
	[torch.empty(self.block_size, shape[1]) for i in range(self.block_count)]
	)

	if type(alpha) == torch.Tensor:
	alpha = alpha.detach().float().numpy() # without casting, bf16 causes error
	alpha = lora_dim if alpha is None or alpha == 0 else alpha
	self.scale = alpha / self.lora_dim
	self.register_buffer("alpha", torch.tensor(alpha)) # 定数として扱える

	# Need more experiences on init method
	for v in self.down_list:
	torch.nn.init.kaiming_uniform_(v, a=math.sqrt(5))
	for v in self.up_list:
	torch.nn.init.zeros_(v)

	def load_state_dict(self, state_dict, strict: bool = True, assign: bool = False):
	return

	def custom_state_dict(self):
	destination = {}
	destination["alpha"] = self.alpha
	destination["lora_up.weight"] = nn.Parameter(
	torch.concat(list(self.up_list), dim=1)
	)
	destination["lora_down.weight"] = nn.Parameter(
	torch.concat(list(self.down_list)).reshape(
	self.lora_dim, -1, *self.shape[2:]
	)
	)
	return destination

	def get_weight(self, rank):
	b = math.ceil(rank / self.block_size)
	down = torch.concat(
	list(i.data for i in self.down_list[:b]) + list(self.down_list[b : (b + 1)])
	)
	up = torch.concat(
	list(i.data for i in self.up_list[:b]) + list(self.up_list[b : (b + 1)]),
	dim=1,
	)
	return down, up, self.alpha / (b + 1)

	def get_random_rank_weight(self):
	b = random.randint(0, self.block_count - 1)
	return self.get_weight(b * self.block_size)

	def get_diff_weight(self, multiplier=1, shape=None, device=None, rank=None):
	if rank is None:
	down, up, scale = self.get_random_rank_weight()
	else:
	down, up, scale = self.get_weight(rank)
	w = up @ (down * (scale * multiplier))
	if device is not None:
	w = w.to(device)
	if shape is not None:
	w = w.view(shape)
	else:
	w = w.view(self.shape)
	return w, None

	def get_merged_weight(self, multiplier=1, shape=None, device=None, rank=None):
	diff, _ = self.get_diff_weight(multiplier, shape, device, rank)
	return diff + self.org_weight, None

	def bypass_forward_diff(self, x, scale=1, rank=None):
	if rank is None:
	down, up, gamma = self.get_random_rank_weight()
	else:
	down, up, scale = self.get_weight(rank)
	down = down.view(self.lora_dim, -1, *self.shape[2:])
	up = up.view(-1, self.lora_dim, *(1 for _ in self.shape[2:]))
	scale = scale * gamma
	return self.op(self.op(x, down, **self.kw_dict), up)

	def bypass_forward(self, x, scale=1, rank=None):
	return self.org_forward(x) + self.bypass_forward_diff(x, scale, rank)

	def forward(self, x, args, *kwargs):
	if self.module_dropout and self.training:
	if torch.rand(1) < self.module_dropout:
	return self.org_forward(x)
	if self.bypass_mode:
	return self.bypass_forward(x, self.multiplier)
	else:
	weight = self.get_merged_weight(multiplier=self.multiplier)[0]
	bias = (
	None
	if self.org_module[0].bias is None
	else self.org_module[0].bias.data
	)
	return self.op(x, weight, bias, **self.kw_dict)