Spaces:

alibayram
/

usta-llm-demo

Running

App Files Files Community

alibayram commited on 20 days ago

Commit

6563ff2

1 Parent(s): 67856b9

v2 implemented

Browse files

Files changed (17) hide show

.DS_Store +0 -0
app.py +29 -14
module_3_3.ipynb +366 -0
v1/u_model.pth +0 -0
{v1 → v2}/__init__.py +0 -0
{v1 → v2}/tokenizer.json +0 -0
v2/u_model_4000.pth +0 -0
{v1 → v2}/usta_causal_attention.py +0 -0
{v1 → v2}/usta_decoder_block.py +12 -6
{v1 → v2}/usta_embedding.py +10 -9
{v1 → v2}/usta_layer_norm.py +3 -4
{v1 → v2}/usta_mlp.py +5 -5
{v1 → v2}/usta_model.py +45 -8
{v1 → v2}/usta_multi_head_attention.py +4 -4
{v1 → v2}/usta_multi_head_attention_old.py +1 -2
{v1 → v2}/usta_self_attention.py +0 -0
{v1 → v2}/usta_tokenizer.py +16 -1

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

app.py CHANGED Viewed

@@ -3,14 +3,14 @@ import os
 import gradio as gr
 import torch
-from v1.usta_model import UstaModel
-from v1.usta_tokenizer import UstaTokenizer
 # Load the model and tokenizer
 def load_model(custom_model_path=None):
     try:
-        u_tokenizer = UstaTokenizer("v1/tokenizer.json")
         print("✅ Tokenizer loaded successfully! vocab size:", len(u_tokenizer.vocab))
         # Model parameters - adjust these to match your trained model
@@ -19,6 +19,7 @@ def load_model(custom_model_path=None):
         embedding_dim = 12
         num_heads = 4
         num_layers = 8
         # Load the model
         u_model = UstaModel(
@@ -26,7 +27,8 @@ def load_model(custom_model_path=None):
             embedding_dim=embedding_dim,
             num_heads=num_heads,
             context_length=context_length,
-            num_layers=num_layers
         )
         # Determine which model file to use
@@ -34,7 +36,7 @@ def load_model(custom_model_path=None):
             model_path = custom_model_path
             print(f"🎯 Using uploaded model: {model_path}")
         else:
-            model_path = "v1/u_model.pth"
             if not os.path.exists(model_path):
                 print("❌ Model file not found at", model_path)
@@ -58,8 +60,8 @@ def load_model(custom_model_path=None):
                     print(f"📦 Downloaded {len(response.content)} bytes")
-                    # Create v1 directory if it doesn't exist
-                    os.makedirs("v1", exist_ok=True)
                     # Save the model weights to the local file system
                     with open(model_path, "wb") as f:
@@ -195,7 +197,7 @@ def load_model_from_file(uploaded_file):
         model_status = error_msg
         return error_msg
-def chat_with_usta(message, history, max_tokens=20):
     """Simple chat function"""
     if model is None or tokenizer is None:
         return history + [["Error", "UstaModel is not available. Please try again later."]]
@@ -211,7 +213,13 @@ def chat_with_usta(message, history, max_tokens=20):
         # Generate response
         with torch.no_grad():
             actual_max_tokens = min(max_tokens, 32 - len(tokens))
-            generated_tokens = model.generate(tokens, actual_max_tokens)
         # Decode the generated tokens
         response = tokenizer.decode(generated_tokens)
@@ -249,7 +257,14 @@ with gr.Blocks(title="🤖 Usta Model Chat") as demo:
         clear_btn = gr.Button("Clear")
     # Generation settings
-    max_tokens = gr.Slider(minimum=1, maximum=30, value=20, step=1, label="Max tokens")
     # Model loading (simplified)
     gr.Markdown("## 🔧 Load Custom Model (Optional)")
@@ -268,20 +283,20 @@ with gr.Blocks(title="🤖 Usta Model Chat") as demo:
     status = gr.Textbox(label="Status", value=model_status, interactive=False)
     # Event handlers
-    def send_message(message, history, max_tok):
         if not message.strip():
             return history, ""
-        return chat_with_usta(message, history, max_tok), ""
     send_btn.click(
         send_message,
-        inputs=[msg, chatbot, max_tokens],
         outputs=[chatbot, msg]
     )
     msg.submit(
         send_message,
-        inputs=[msg, chatbot, max_tokens],
         outputs=[chatbot, msg]
     )

 import gradio as gr
 import torch
+from v2.usta_model import UstaModel
+from v2.usta_tokenizer import UstaTokenizer
 # Load the model and tokenizer
 def load_model(custom_model_path=None):
     try:
+        u_tokenizer = UstaTokenizer("v2/tokenizer.json")
         print("✅ Tokenizer loaded successfully! vocab size:", len(u_tokenizer.vocab))
         # Model parameters - adjust these to match your trained model
         embedding_dim = 12
         num_heads = 4
         num_layers = 8
+        device = "cpu"  # Use CPU for compatibility
         # Load the model
         u_model = UstaModel(
             embedding_dim=embedding_dim,
             num_heads=num_heads,
             context_length=context_length,
+            num_layers=num_layers,
+            device=device
         )
         # Determine which model file to use
             model_path = custom_model_path
             print(f"🎯 Using uploaded model: {model_path}")
         else:
+            model_path = "v2/u_model_4000.pth"
             if not os.path.exists(model_path):
                 print("❌ Model file not found at", model_path)
                     print(f"📦 Downloaded {len(response.content)} bytes")
+                    # Create v2 directory if it doesn't exist
+                    os.makedirs("v2", exist_ok=True)
                     # Save the model weights to the local file system
                     with open(model_path, "wb") as f:
         model_status = error_msg
         return error_msg
+def chat_with_usta(message, history, max_tokens=20, temperature=1.0, top_k=64, top_p=1.0):
     """Simple chat function"""
     if model is None or tokenizer is None:
         return history + [["Error", "UstaModel is not available. Please try again later."]]
         # Generate response
         with torch.no_grad():
             actual_max_tokens = min(max_tokens, 32 - len(tokens))
+            generated_tokens = model.generate(
+                tokens,
+                max_new_tokens=actual_max_tokens,
+                temperature=temperature,
+                top_k=top_k,
+                top_p=top_p
+            )
         # Decode the generated tokens
         response = tokenizer.decode(generated_tokens)
         clear_btn = gr.Button("Clear")
     # Generation settings
+    gr.Markdown("## ⚙️ Generation Settings")
+    with gr.Row():
+        max_tokens = gr.Slider(minimum=1, maximum=30, value=20, step=1, label="Max tokens")
+        temperature = gr.Slider(minimum=0.1, maximum=2.0, value=1.0, step=0.1, label="Temperature")
+    with gr.Row():
+        top_k = gr.Slider(minimum=1, maximum=64, value=40, step=1, label="Top-k")
+        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=1.0, step=0.05, label="Top-p (nucleus sampling)")
     # Model loading (simplified)
     gr.Markdown("## 🔧 Load Custom Model (Optional)")
     status = gr.Textbox(label="Status", value=model_status, interactive=False)
     # Event handlers
+    def send_message(message, history, max_tok, temp, k, p):
         if not message.strip():
             return history, ""
+        return chat_with_usta(message, history, max_tok, temp, k, p), ""
     send_btn.click(
         send_message,
+        inputs=[msg, chatbot, max_tokens, temperature, top_k, top_p],
         outputs=[chatbot, msg]
     )
     msg.submit(
         send_message,
+        inputs=[msg, chatbot, max_tokens, temperature, top_k, top_p],
         outputs=[chatbot, msg]
     )

module_3_3.ipynb ADDED Viewed

	@@ -0,0 +1,366 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using device: mps\n",
+      "tensor([ 0, 61,  1, 61,  2, 61,  0, 61,  3], device='mps:0')\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([4, 32])"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "from usta_model import UstaModel\n",
+    "from usta_tokenizer import UstaTokenizer\n",
+    "\n",
+    "device = \"cpu\"\n",
+    "\n",
+    "if torch.cuda.is_available():\n",
+    "  device = \"cuda\"\n",
+    "elif torch.backends.mps.is_available():\n",
+    "  device = \"mps\"\n",
+    "  \n",
+    "\n",
+    "print(f\"Using device: {device}\")\n",
+    "\n",
+    "u_tokenizer = UstaTokenizer(\"tokenizer.json\")\n",
+    "\n",
+    "prompts = [\n",
+    "  \"the capital of the united\",\n",
+    "  \"madrid is in\",\n",
+    "  \"the capital of france is\",\n",
+    "  \"the capital of germany is\"\n",
+    "]\n",
+    "\n",
+    "tokens = u_tokenizer.encode(prompts[0])\n",
+    "tokens = tokens.to(device)\n",
+    "print(tokens)\n",
+    "batch_tokens = u_tokenizer.encode_batch(prompts, 32)\n",
+    "batch_tokens = batch_tokens.to(device)\n",
+    "batch_tokens.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<All keys matched successfully>"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "torch.manual_seed(1)\n",
+    "context_length = 32\n",
+    "\n",
+    "u_model = UstaModel(\n",
+    "  vocab_size=len(u_tokenizer.vocab),\n",
+    "  embedding_dim=12,\n",
+    "  num_heads=4,\n",
+    "  context_length=context_length,\n",
+    "  num_layers=8,\n",
+    "  device=device\n",
+    ")\n",
+    "\n",
+    "# load model\n",
+    "u_model.load_state_dict(torch.load(\"../u_model_4000.pth\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([4, 32, 64])"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "out = u_model(batch_tokens)\n",
+    "out.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# temperature\n",
+    "# top_k \n",
+    "# top_p\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "top_k = 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([17.6884, 14.0799,  9.0104,  8.4548,  7.3207,  7.2960,  6.8096,  6.6073,\n",
+       "          6.6009,  6.3761]),\n",
+       " [61, 60, 35, 58, 9, 38, 59, 4, 18, 49])"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sorted_outs = sorted(out[-1][-1].tolist(), reverse=True)\n",
+    "sorted_indexes = []\n",
+    "for so in sorted_outs[:top_k]:\n",
+    "  so_index = out[-1][-1].tolist().index(so)\n",
+    "  sorted_indexes.append(so_index)\n",
+    "sorted_outs = torch.tensor(sorted_outs[:top_k])\n",
+    "sorted_outs, sorted_indexes\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([17.6884, 14.0799,  9.0104,  8.4548,  7.3207,  7.2960,  6.8096,  6.6073,\n",
+       "          6.6009,  6.3761], device='mps:0', grad_fn=<TopkBackward0>),\n",
+       " tensor([61, 60, 35, 58,  9, 38, 59,  4, 18, 49], device='mps:0'))"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "values, indexes = torch.topk(out[-1][-1], k=10)\n",
+    "values, indexes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/z7/wrd0w0hn7pvb9g97kmdn17640000gn/T/ipykernel_91075/2885985782.py:2: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  adjusted_outs = torch.tensor(sorted_outs) / temperature\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "tensor([1.6830, 1.3397, 0.8573, 0.8045, 0.6965, 0.6942, 0.6479, 0.6287, 0.6281,\n",
+       "        0.6067])"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "temperature = 10.51\n",
+    "adjusted_outs = torch.tensor(sorted_outs) / temperature\n",
+    "adjusted_outs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([0.2128, 0.1509, 0.0932, 0.0884, 0.0793, 0.0791, 0.0756, 0.0741, 0.0741,\n",
+       "        0.0725])"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "probs = torch.softmax(adjusted_outs, dim=-1)\n",
+    "probs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "top_p = 0.7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor(0.5453)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "[0.2128, 0.36, 0.37, 0.38, 0.70, 0.71]\n",
+    "torch.sum(torch.tensor([0.2128, 0.1509, 0.0932, 0.0884]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{0: 212, 4: 82, 5: 87, 9: 83, 2: 74, 6: 73, 1: 154, 3: 91, 8: 80, 7: 64}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sample_count = {}\n",
+    "for _ in range(1000):\n",
+    "  sample = torch.multinomial(probs, 1)\n",
+    "  sample_count[sample.item()] = sample_count.get(sample.item(), 0) + 1\n",
+    "sample_count"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'the capital of the united.': 3,\n",
+       " 'the capital of the united the ': 22,\n",
+       " 'the capital of the united identity,': 1,\n",
+       " 'the capital of the united capitals': 5,\n",
+       " 'the capital of the united country ': 8,\n",
+       " 'the capital of the united europe ': 26,\n",
+       " 'the capital of the united is ': 7,\n",
+       " 'the capital of the united place ': 4,\n",
+       " 'the capital of the united europe,': 3,\n",
+       " 'the capital of the united united ': 6,\n",
+       " 'the capital of the united for ': 1,\n",
+       " 'the capital of the united spain,': 2,\n",
+       " 'the capital of the united europe.': 1,\n",
+       " 'the capital of the united italy,': 4,\n",
+       " 'the capital of the united art ': 1,\n",
+       " 'the capital of the united of ': 1,\n",
+       " 'the capital of the united  united': 1,\n",
+       " 'the capital of the united capitaled': 1,\n",
+       " 'the capital of the united, country': 1,\n",
+       " 'the capital of the united place.': 1,\n",
+       " 'the capital of the united, europe': 1}"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "outs = {}\n",
+    "for _ in range(100):\n",
+    "  out = u_model.generate(tokens, max_new_tokens = 3, temperature = 1.7, top_k = 10, top_p = 0.7)\n",
+    "  decoded = u_tokenizer.decode(out)\n",
+    "  outs[decoded] = outs.get(decoded, 0) + 1\n",
+    "outs"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

v1/u_model.pth DELETED Viewed

Binary file (97.2 kB)

{v1 → v2}/__init__.py RENAMED Viewed

File without changes

{v1 → v2}/tokenizer.json RENAMED Viewed

File without changes

v2/u_model_4000.pth ADDED Viewed

Binary file (96.1 kB). View file

{v1 → v2}/usta_causal_attention.py RENAMED Viewed

File without changes

{v1 → v2}/usta_decoder_block.py RENAMED Viewed

@@ -6,17 +6,23 @@ from .usta_multi_head_attention import UstaMultiHeadAttention
 class UstaDecoderBlock(nn.Module):
-  def __init__(self, embedding_dim, num_heads, context_length):
     super().__init__()
-    self.self_attention = UstaMultiHeadAttention(embedding_dim, embedding_dim, context_length, num_heads, dropout_rate=0.5)
-    self.norm1 = UstaLayerNorm(embedding_dim)
-    self.mlp = UstaMLP(embedding_dim, embedding_dim)
-    self.norm2 = UstaLayerNorm(embedding_dim)
   def forward(self, x):
     res = self.norm1(x)
     x = self.self_attention(x)
     x = self.norm1(x)

 class UstaDecoderBlock(nn.Module):
+  def __init__(self, embedding_dim, num_heads, context_length, device):
     super().__init__()
+    self.self_attention = UstaMultiHeadAttention(
+      embedding_dim,
+      embedding_dim,
+      context_length,
+      num_heads,
+      dropout_rate=0.5,
+      device=device
+    )
+    self.norm1 = UstaLayerNorm(embedding_dim, device=device)
+    self.mlp = UstaMLP(embedding_dim, embedding_dim, device=device)
+    self.norm2 = UstaLayerNorm(embedding_dim, device=device)
   def forward(self, x):
     res = self.norm1(x)
     x = self.self_attention(x)
     x = self.norm1(x)

{v1 → v2}/usta_embedding.py RENAMED Viewed

@@ -3,7 +3,7 @@ import torch.nn as nn
 def get_rotary_position_encoding(input: torch.Tensor, base=10000, device="cpu"):
-  context_length, dimension = input.shape
   assert dimension % 2 == 0, "dimension must be even"
@@ -20,30 +20,31 @@ def get_rotary_position_encoding(input: torch.Tensor, base=10000, device="cpu"):
   sin_angles = torch.sin(angles)
   cos_angles = torch.cos(angles)
-  input_even = input[:, :dimension // 2] # [0, 2, 4, ..]
-  input_odd = input[:, dimension // 2:] # [1, 3, 5, ..]
   input_even_rotated = input_even * cos_angles - input_odd * sin_angles
   input_odd_rotated = input_even * sin_angles + input_odd * cos_angles
-  input_rotated = torch.empty_like(input)
-  input_rotated[:, :dimension // 2] = input_even_rotated
-  input_rotated[:, dimension // 2:] = input_odd_rotated
   return input_rotated
 class UstaEmbedding(nn.Module):
-  def __init__(self, vocab_size, embedding_dim, context_length):
     super().__init__()
     # position embedding but not being used in the forward pass
     # it is just for educational purposes
     # self.pos_embedding = nn.Embedding(context_length, embedding_dim)
     # self.get_pos = get_rotary_position_encoding
-    self.embedding = nn.Embedding(vocab_size, embedding_dim)
     self.get_pos = get_rotary_position_encoding
   def forward(self, x):
     x = self.embedding(x)
-    x = self.get_pos(x)
     return x

 def get_rotary_position_encoding(input: torch.Tensor, base=10000, device="cpu"):
+  batch_size, context_length, dimension = input.shape
   assert dimension % 2 == 0, "dimension must be even"
   sin_angles = torch.sin(angles)
   cos_angles = torch.cos(angles)
+  input_even = input[:, :, :dimension // 2] # [0, 2, 4, ..]
+  input_odd = input[:, :, dimension // 2:] # [1, 3, 5, ..]
   input_even_rotated = input_even * cos_angles - input_odd * sin_angles
   input_odd_rotated = input_even * sin_angles + input_odd * cos_angles
+  input_rotated = torch.empty_like(input, device=device)
+  input_rotated[:, :, :dimension // 2] = input_even_rotated
+  input_rotated[:, :, dimension // 2:] = input_odd_rotated
   return input_rotated
 class UstaEmbedding(nn.Module):
+  def __init__(self, vocab_size, embedding_dim, context_length, device):
     super().__init__()
     # position embedding but not being used in the forward pass
     # it is just for educational purposes
     # self.pos_embedding = nn.Embedding(context_length, embedding_dim)
     # self.get_pos = get_rotary_position_encoding
+    self.embedding = nn.Embedding(vocab_size, embedding_dim, device=device)
     self.get_pos = get_rotary_position_encoding
+    self.device = device
   def forward(self, x):
     x = self.embedding(x)
+    x = self.get_pos(x, device=self.device)
     return x

{v1 → v2}/usta_layer_norm.py RENAMED Viewed

@@ -3,13 +3,12 @@ import torch.nn as nn
 class UstaLayerNorm(nn.Module):
-  def __init__(self, embedding_dim, eps=1e-5):
     super().__init__()
     self.eps = eps
-    self.weight = nn.Parameter(torch.ones(embedding_dim))
   def forward(self, x):
     mean = x.mean(dim=-1, keepdim=True)
     variance = x.var(dim=-1, keepdim=True, unbiased=False)

 class UstaLayerNorm(nn.Module):
+  def __init__(self, embedding_dim, eps=1e-5, device="cpu"):
     super().__init__()
     self.eps = eps
+    self.weight = nn.Parameter(torch.ones(embedding_dim, device=device))
+    self.device = device
   def forward(self, x):
     mean = x.mean(dim=-1, keepdim=True)
     variance = x.var(dim=-1, keepdim=True, unbiased=False)

{v1 → v2}/usta_mlp.py RENAMED Viewed

@@ -14,13 +14,13 @@ class GELU(nn.Module):
     )
 class UstaMLP(nn.Module):
-  def __init__(self, embedding_dim, hidden_dim):
     super().__init__()
-    self.gate_proj = nn.Linear(embedding_dim, hidden_dim)
-    self.up_proj = nn.Linear(embedding_dim, hidden_dim)
-    self.down_proj = nn.Linear(hidden_dim, embedding_dim)
-    self.gelu = GELU()
   def forward(self, x):
     """ gate = self.gate_proj(x)

     )
 class UstaMLP(nn.Module):
+  def __init__(self, embedding_dim, hidden_dim, device="cpu"):
     super().__init__()
+    self.gate_proj = nn.Linear(embedding_dim, hidden_dim, device=device)
+    self.up_proj = nn.Linear(embedding_dim, hidden_dim, device=device)
+    self.down_proj = nn.Linear(hidden_dim, embedding_dim, device=device)
+    self.gelu = GELU().to(device)
   def forward(self, x):
     """ gate = self.gate_proj(x)

{v1 → v2}/usta_model.py RENAMED Viewed

@@ -6,15 +6,16 @@ from .usta_embedding import UstaEmbedding
 class UstaModel(nn.Module):
-  def __init__(self, vocab_size, embedding_dim, num_heads, context_length, num_layers):
     super().__init__()
-    self.embedding = UstaEmbedding(vocab_size, embedding_dim, context_length)
     self.layers = nn.Sequential(
-      *[UstaDecoderBlock(embedding_dim, num_heads, context_length) for _ in range(num_layers)]
     )
-    self.lm_head = nn.Linear(embedding_dim, vocab_size)
   def forward(self, x: torch.Tensor):
     x = self.embedding(x) # dictionary meaning of the tokens (words)
@@ -32,13 +33,49 @@ class UstaModel(nn.Module):
   max_prob, max_index, probs
   """
-  def generate(self, x: torch.Tensor, max_new_tokens: int): # top_k, top_p, temperature
-    tokens = x.detach().cpu().numpy().tolist()
     for _ in range(max_new_tokens):
       out = self.forward(x)
-      probs = torch.softmax(out[-1], dim=-1)
-      _, max_index = torch.max(probs, dim=-1)
       tokens.append(max_index.item())
       if max_index == 59 or len(tokens) > 32: # <eos> and max context length
         break

 class UstaModel(nn.Module):
+  def __init__(self, vocab_size, embedding_dim, num_heads, context_length, num_layers, device):
     super().__init__()
+    self.embedding = UstaEmbedding(vocab_size, embedding_dim, context_length, device)
     self.layers = nn.Sequential(
+      *[UstaDecoderBlock(embedding_dim, num_heads, context_length, device) for _ in range(num_layers)]
     )
+    self.lm_head = nn.Linear(embedding_dim, vocab_size, device=device)
+    self.device = device
   def forward(self, x: torch.Tensor):
     x = self.embedding(x) # dictionary meaning of the tokens (words)
   max_prob, max_index, probs
   """
+  def top_p_filtering(self, logits, top_p):
+    sorted_logits, sorted_indices = torch.sort(logits, descending=True)
+    cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
+    sorted_indices_to_remove = cumulative_probs > top_p
+    sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
+    sorted_indices_to_remove[..., 0] = False
+    sorted_logits[sorted_indices_to_remove] = -float('inf')
+    filtered_logits = sorted_logits.clone()
+    filtered_logits.scatter_(0, sorted_indices, sorted_logits)
+    return filtered_logits
+  def generate(self,
+               x: torch.Tensor,
+               max_new_tokens: int = 3,
+               temperature: float = 1.0,
+               top_k: int = 64,
+               top_p: float = 1.0
+              ): # top_k, top_p, temperature
+    tokens = x.tolist()
     for _ in range(max_new_tokens):
+      x = x.unsqueeze(0).to(self.device)
       out = self.forward(x)
+      out = out.squeeze(0)
+      logits = out[-1]
+      if top_k > 0:
+        values, indexes = torch.topk(logits, k=top_k)
+        logits = torch.full_like(logits, -float('inf'))
+        logits.scatter_(0, indexes, values)
+      if top_p > 0 and top_p < 1:
+        logits = self.top_p_filtering(logits, top_p)
+      if temperature != 1.0 and temperature > 0:
+        logits = logits / temperature
+      probs = torch.softmax(values, dim=-1)
+      # _, max_index = torch.max(probs, dim=-1)
+      sample = torch.multinomial(probs, 1)
+      max_index = indexes[sample]
       tokens.append(max_index.item())
       if max_index == 59 or len(tokens) > 32: # <eos> and max context length
         break

{v1 → v2}/usta_multi_head_attention.py RENAMED Viewed

@@ -3,15 +3,15 @@ import torch.nn as nn
 class UstaMultiHeadAttention(nn.Module):
-  def __init__(self, embedding_dim, output_dim, context_length, num_heads, dropout_rate = 0):
     super().__init__()
     self.context_length = context_length
-    self.multi_head_attention = nn.MultiheadAttention(embedding_dim, num_heads, dropout=dropout_rate)
-    self.projection = nn.Linear(embedding_dim, output_dim)
-    self.register_buffer("mask", torch.triu(torch.ones(context_length, context_length), diagonal=1).bool())
   def forward(self, x):
     number_of_tokens = x.shape[0]

 class UstaMultiHeadAttention(nn.Module):
+  def __init__(self, embedding_dim, output_dim, context_length, num_heads, dropout_rate = 0, device="cpu"):
     super().__init__()
     self.context_length = context_length
+    self.multi_head_attention = nn.MultiheadAttention(embedding_dim, num_heads, dropout=dropout_rate, device=device)
+    self.projection = nn.Linear(embedding_dim, output_dim, device=device)
+    self.register_buffer("mask", torch.triu(torch.ones(context_length, context_length), diagonal=1).bool().to(device))
   def forward(self, x):
     number_of_tokens = x.shape[0]

{v1 → v2}/usta_multi_head_attention_old.py RENAMED Viewed

@@ -22,5 +22,4 @@ class UstaMultiHeadAttention(nn.Module):
     attention_out = torch.cat(attention_outs, dim=1)
-    return self.projection(attention_out)


22
23	attention_out = torch.cat(attention_outs, dim=1)
24
25	+ return self.projection(attention_out)

{v1 → v2}/usta_self_attention.py RENAMED Viewed

File without changes

{v1 → v2}/usta_tokenizer.py RENAMED Viewed

@@ -9,6 +9,19 @@ class UstaTokenizer:
       self.vocab = json.load(f)
       self.reverse_vocab = {v: k for k, v in self.vocab.items()}
   def encode(self, text):
     tokens = []
@@ -31,7 +44,9 @@ class UstaTokenizer:
           i += 1
       tokens.append(self.vocab[" "])
-    tokens.pop()
     return torch.tensor(tokens)
   def tokenize(self, text):

       self.vocab = json.load(f)
       self.reverse_vocab = {v: k for k, v in self.vocab.items()}
+  def encode_batch(self, texts, context_length):
+    sentences_tokens = []
+    for text in texts:
+      tokens = self.encode(text).tolist()
+      if len(tokens) > context_length:
+        tokens = tokens[:context_length]
+      else:
+        tokens = tokens + [self.vocab["<pad>"]] * (context_length - len(tokens))
+      sentences_tokens.append(tokens)
+    return torch.tensor(sentences_tokens)
   def encode(self, text):
     tokens = []
           i += 1
       tokens.append(self.vocab[" "])
+    # check if text is not ends with a space
+    if not text.endswith(" "):
+      tokens.pop()
     return torch.tensor(tokens)
   def tokenize(self, text):