piyushgrover commited on
Commit
e164375
·
verified ·
1 Parent(s): 189668a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -18
app.py CHANGED
@@ -1,36 +1,50 @@
1
  import gradio as gr
2
  import torch
3
- from tiktoken import get_encoding
4
- from model import GPT, GPTConfig # Replace with your actual model file/module
 
5
 
6
- # Load the GPT-2 tokenizer
7
- tokenizer = get_encoding("gpt2")
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Load your custom model (adjust as necessary for your model's implementation)
10
  model_path = "model.pth" # Replace with the path to your model weights
11
- model = GPT(GPTConfig()) # Initialize your custom model
12
- model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
13
  model.eval() # Set the model to evaluation mode
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # Function to tokenize input and generate text
17
  def generate_text(prompt, max_length=50):
18
- # Tokenize the input
19
- input_ids = tokenizer.encode(prompt)
20
- input_tensor = torch.tensor([input_ids]) # Add batch dimension
21
-
22
- # Generate text using the model
23
- with torch.no_grad():
24
- output_ids = model.generate(input_tensor, max_length=max_length) # Adjust if your model uses another method
25
-
26
- # Decode the output back to text
27
- generated_text = tokenizer.decode(output_ids[0].tolist())
28
- return generated_text
29
 
30
 
31
  # Gradio interface
32
  with gr.Blocks() as demo:
33
- gr.Markdown("# Custom Transformer Text Generation")
34
  gr.Markdown("Provide an input text prompt, and the model will generate text based on it.")
35
 
36
  with gr.Row():
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoTokenizer
4
+ from model import SmollM
5
+ import yaml
6
 
7
+ device = "cuda" if torch.cuda.is_available() else "cpu"
8
+ with open("config.yaml", "r") as f:
9
+ config = yaml.safe_load(f)
10
+
11
+ ## Speed up with malmul
12
+ torch.set_float32_matmul_precision('high')
13
+
14
+ # Load model and tokenizer
15
+ model = SmollM(config['model']['model_config'])
16
+
17
+ # Load the tokenizer
18
+ tokenizer = AutoTokenizer.from_pretrained(config['tokenizer']['tokenizer_name_or_path'])
19
 
20
  # Load your custom model (adjust as necessary for your model's implementation)
21
  model_path = "model.pth" # Replace with the path to your model weights
22
+ checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))
23
+ model.load_state_dict(checkpoint['model_state_dict'])
24
  model.eval() # Set the model to evaluation mode
25
 
26
+ def generate_tokens(model, tokenizer, prompt, max_length=50, device="cuda"):
27
+ """Generates output tokens based on a given prompt."""
28
+ model.eval()
29
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
30
+ with torch.no_grad():
31
+ outputs = input_ids
32
+ for _ in range(max_length):
33
+ logits = model(outputs[:, -1:])
34
+ next_token = torch.argmax(logits[:, -1, :], dim=-1, keepdim=True)
35
+ outputs = torch.cat([outputs, next_token], dim=1)
36
+ if next_token.item() == tokenizer.eos_token_id:
37
+ break
38
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
39
 
40
  # Function to tokenize input and generate text
41
  def generate_text(prompt, max_length=50):
42
+ return generate_tokens(model, tokenizer, prompt, max_length, device)
 
 
 
 
 
 
 
 
 
 
43
 
44
 
45
  # Gradio interface
46
  with gr.Blocks() as demo:
47
+ gr.Markdown("# SmoLLM-135M Text Generation Demo")
48
  gr.Markdown("Provide an input text prompt, and the model will generate text based on it.")
49
 
50
  with gr.Row():