Spaces:

jannisborn
/

NumberTokenLoss

Running

App Files Files Community

NumberTokenLoss / src /scenarios.py

jannisborn

wip

9914a10 unverified 24 days ago

raw

history blame

1.83 kB

	import numpy as np

	# (1) A one-hot moving from token 0 to token 10 (“Text”)
	dirac = [
	{
	"name": f"Dirac: all mass on token {i}",
	"values": [1.0 if j == i else 0.0 for j in range(11)],
	"ground_truth": "4",
	"explanation": "A Dirac distribution: all probability on a single token.",
	}
	for i in range(11)
	]


	# (2) A Gaussian with peak_mass=0.6 at center, remaining mass=0.4 spread by a Gaussian ---
	def make_gauss_values(center, n=11, sigma=1.5, peak_mass=0.6):
	xs = np.arange(n)
	# unnormalized Gaussian
	kernel = np.exp(-0.5 * ((xs - center) / sigma) ** 2)
	# zero out the center, re-normalize the other weights to sum to 1
	others = kernel.copy()
	others[center] = 0.0
	others /= others.sum()
	# allocate 0.6 to the center, 0.4 to the rest
	vals = others * (1.0 - peak_mass)
	vals[center] = peak_mass
	return vals.tolist()


	gauss = [
	{
	"name": f"Gaussian: center at token {c}",
	"values": make_gauss_values(c),
	"ground_truth": "4",
	"explanation": "Gaussian-style: 0.6 mass at the highlighted token, 0.4 spread smoothly to its neighbors.",
	}
	for c in range(11)
	]


	# (3) Bimodal: two spikes of 0.5 mass each, symmetrically offset from the GT=4 ---
	def make_bimodal_values(offset, n=11, gt=4):
	# clamp to [0,n-1]
	left = max(0, gt - offset)
	right = min(n - 1, gt + offset)
	vals = [0.0] * n
	vals[left] = 0.5
	vals[right] = 0.5
	return vals


	bimodal = [
	{
	"name": f"Bimodal: peaks at tokens {max(0, 4 - d)} & {min(10, 4 + d)}",
	"values": make_bimodal_values(d),
	"ground_truth": "4",
	"explanation": "Two-point (bimodal) distribution: equal 0.5 mass on each peak, which move ±offset from the ground truth.",
	}
	for d in range(11)
	]