Spaces:

piyushgrover
/

SmoLLM-135M

Sleeping

App Files Files Community

piyushgrover commited on Jan 24

Commit

e164375

verified ·

1 Parent(s): 189668a

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -18

app.py CHANGED Viewed

@@ -1,36 +1,50 @@
 import gradio as gr
 import torch
-from tiktoken import get_encoding
-from model import GPT, GPTConfig  # Replace with your actual model file/module
-# Load the GPT-2 tokenizer
-tokenizer = get_encoding("gpt2")
 # Load your custom model (adjust as necessary for your model's implementation)
 model_path = "model.pth"  # Replace with the path to your model weights
-model = GPT(GPTConfig())  # Initialize your custom model
-model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
 model.eval()  # Set the model to evaluation mode
 # Function to tokenize input and generate text
 def generate_text(prompt, max_length=50):
-    # Tokenize the input
-    input_ids = tokenizer.encode(prompt)
-    input_tensor = torch.tensor([input_ids])  # Add batch dimension
-    # Generate text using the model
-    with torch.no_grad():
-        output_ids = model.generate(input_tensor, max_length=max_length)  # Adjust if your model uses another method
-    # Decode the output back to text
-    generated_text = tokenizer.decode(output_ids[0].tolist())
-    return generated_text
 # Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# Custom Transformer Text Generation")
     gr.Markdown("Provide an input text prompt, and the model will generate text based on it.")
     with gr.Row():

 import gradio as gr
 import torch
+from transformers import AutoTokenizer
+from model import SmollM
+import yaml
+device = "cuda" if torch.cuda.is_available() else "cpu"
+with open("config.yaml", "r") as f:
+    config = yaml.safe_load(f)
+## Speed up with malmul
+torch.set_float32_matmul_precision('high')
+# Load model and tokenizer
+model = SmollM(config['model']['model_config'])
+# Load the tokenizer
+tokenizer = AutoTokenizer.from_pretrained(config['tokenizer']['tokenizer_name_or_path'])
 # Load your custom model (adjust as necessary for your model's implementation)
 model_path = "model.pth"  # Replace with the path to your model weights
+checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))
+model.load_state_dict(checkpoint['model_state_dict'])
 model.eval()  # Set the model to evaluation mode
+def generate_tokens(model, tokenizer, prompt, max_length=50, device="cuda"):
+    """Generates output tokens based on a given prompt."""
+    model.eval()
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+    with torch.no_grad():
+        outputs = input_ids
+        for _ in range(max_length):
+            logits = model(outputs[:, -1:])
+            next_token = torch.argmax(logits[:, -1, :], dim=-1, keepdim=True)
+            outputs = torch.cat([outputs, next_token], dim=1)
+            if next_token.item() == tokenizer.eos_token_id:
+                break
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
 # Function to tokenize input and generate text
 def generate_text(prompt, max_length=50):
+    return generate_tokens(model, tokenizer, prompt, max_length, device)
 # Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# SmoLLM-135M Text Generation Demo")
     gr.Markdown("Provide an input text prompt, and the model will generate text based on it.")
     with gr.Row():