In [1]:
import torch

from usta_model import UstaModel
from usta_tokenizer import UstaTokenizer

device = "cpu"

if torch.cuda.is_available():
 device = "cuda"
elif torch.backends.mps.is_available():
 device = "mps"
 

print(f"Using device: {device}")

u_tokenizer = UstaTokenizer("tokenizer.json")

prompts = [
 "the capital of the united",
 "madrid is in",
 "the capital of france is",
 "the capital of germany is"
]

tokens = u_tokenizer.encode(prompts[0])
tokens = tokens.to(device)
print(tokens)
batch_tokens = u_tokenizer.encode_batch(prompts, 32)
batch_tokens = batch_tokens.to(device)
batch_tokens.shape

Using device: mps
tensor([ 0, 61, 1, 61, 2, 61, 0, 61, 3], device='mps:0')


torch.Size([4, 32])

In [2]:
torch.manual_seed(1)
context_length = 32

u_model = UstaModel(
 vocab_size=len(u_tokenizer.vocab),
 embedding_dim=12,
 num_heads=4,
 context_length=context_length,
 num_layers=8,
 device=device
)

# load model
u_model.load_state_dict(torch.load("../u_model_4000.pth"))



In [3]:
out = u_model(batch_tokens)
out.shape

torch.Size([4, 32, 64])

In [4]:
# temperature
# top_k 
# top_p


In [5]:
top_k = 10

In [6]:
sorted_outs = sorted(out[-1][-1].tolist(), reverse=True)
sorted_indexes = []
for so in sorted_outs[:top_k]:
 so_index = out[-1][-1].tolist().index(so)
 sorted_indexes.append(so_index)
sorted_outs = torch.tensor(sorted_outs[:top_k])
sorted_outs, sorted_indexes


(tensor([17.6884, 14.0799, 9.0104, 8.4548, 7.3207, 7.2960, 6.8096, 6.6073,
 6.6009, 6.3761]),
 [61, 60, 35, 58, 9, 38, 59, 4, 18, 49])

In [7]:
values, indexes = torch.topk(out[-1][-1], k=10)
values, indexes

(tensor([17.6884, 14.0799, 9.0104, 8.4548, 7.3207, 7.2960, 6.8096, 6.6073,
 6.6009, 6.3761], device='mps:0', grad_fn=),
 tensor([61, 60, 35, 58, 9, 38, 59, 4, 18, 49], device='mps:0'))

In [8]:
temperature = 10.51
adjusted_outs = torch.tensor(sorted_outs) / temperature
adjusted_outs

 adjusted_outs = torch.tensor(sorted_outs) / temperature


tensor([1.6830, 1.3397, 0.8573, 0.8045, 0.6965, 0.6942, 0.6479, 0.6287, 0.6281,
 0.6067])

In [9]:
probs = torch.softmax(adjusted_outs, dim=-1)
probs

tensor([0.2128, 0.1509, 0.0932, 0.0884, 0.0793, 0.0791, 0.0756, 0.0741, 0.0741,
 0.0725])

In [10]:
top_p = 0.7

In [11]:
[0.2128, 0.36, 0.37, 0.38, 0.70, 0.71]
torch.sum(torch.tensor([0.2128, 0.1509, 0.0932, 0.0884]))

tensor(0.5453)

In [12]:
sample_count = {}
for _ in range(1000):
 sample = torch.multinomial(probs, 1)
 sample_count[sample.item()] = sample_count.get(sample.item(), 0) + 1
sample_count

{0: 212, 4: 82, 5: 87, 9: 83, 2: 74, 6: 73, 1: 154, 3: 91, 8: 80, 7: 64}

In [14]:
outs = {}
for _ in range(100):
 out = u_model.generate(tokens, max_new_tokens = 3, temperature = 1.7, top_k = 10, top_p = 0.7)
 decoded = u_tokenizer.decode(out)
 outs[decoded] = outs.get(decoded, 0) + 1
outs

{'the capital of the united.': 3,
 'the capital of the united the ': 22,
 'the capital of the united identity,': 1,
 'the capital of the united capitals': 5,
 'the capital of the united country ': 8,
 'the capital of the united europe ': 26,
 'the capital of the united is ': 7,
 'the capital of the united place ': 4,
 'the capital of the united europe,': 3,
 'the capital of the united united ': 6,
 'the capital of the united for ': 1,
 'the capital of the united spain,': 2,
 'the capital of the united europe.': 1,
 'the capital of the united italy,': 4,
 'the capital of the united art ': 1,
 'the capital of the united of ': 1,
 'the capital of the united united': 1,
 'the capital of the united capitaled': 1,
 'the capital of the united, country': 1,
 'the capital of the united place.': 1,
 'the capital of the united, europe': 1}