3outeille HF Staff commited on
Commit
45381ed
·
verified ·
1 Parent(s): 0a7ed44

Create create_tiny.py

Browse files
Files changed (1) hide show
  1. create_tiny.py +58 -0
create_tiny.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import shlex
3
+ import torch
4
+ from transformers import LlamaConfig, LlamaForCausalLM, LlamaTokenizerFast
5
+
6
+
7
+ mname_from = "meta-llama/Llama-2-7b-hf"
8
+ mname_tiny = "tiny-llama-2-debug"
9
+ vocab_keep_items = 3000
10
+
11
+ config = LlamaConfig.from_pretrained(mname_from)
12
+ config.update(dict(
13
+ hidden_size=16,
14
+ intermediate_size=64,
15
+ num_attention_heads=4,
16
+ num_hidden_layers=2,
17
+ max_position_embeddings=256,
18
+ num_key_value_heads=4,
19
+ vocab_size=vocab_keep_items,
20
+ ))
21
+ print("new config", config)
22
+
23
+ # create a tiny random model
24
+ tiny_model = LlamaForCausalLM(config)
25
+ print(f"num of params {tiny_model.num_parameters()}")
26
+
27
+ # shrink it more and save
28
+ tiny_model.save_pretrained(mname_tiny)
29
+
30
+ # shrink the tokenizer from 32k to 3k vocab
31
+ tokenizer_fast = LlamaTokenizerFast.from_pretrained(mname_from)
32
+ tmp_dir = f"/tmp/{mname_from}"
33
+ tokenizer_fast.save_pretrained(tmp_dir)
34
+ # resize tokenizer.json (vocab.txt will be automatically resized on save_pretrained)
35
+ # perl -0777 -pi -e 's|(2999).*|$1},"merges": []}}|msg' tokenizer.json # 0-indexed, so vocab_keep_items-1!
36
+ closing_pat = '},"merges": []}}'
37
+ cmd = (f"perl -0777 -pi -e 's|({vocab_keep_items-1}).*|$1{closing_pat}|msg' {tmp_dir}/tokenizer.json")
38
+ #print(f"Running:\n{cmd}")
39
+ result = subprocess.run(shlex.split(cmd), capture_output=True, text=True)
40
+ #print(result)
41
+
42
+ # reload with modified tokenizer
43
+ tiny_tokenizer = LlamaTokenizerFast.from_pretrained(tmp_dir)
44
+ tiny_tokenizer.save_pretrained(mname_tiny)
45
+
46
+ # test the new model and tokenizer function
47
+ model_inputs = tiny_tokenizer("Making tiny model", return_tensors="pt")
48
+ gen_tokens = tiny_model.generate(**model_inputs, max_new_tokens=100)
49
+ print(tiny_tokenizer.batch_decode(gen_tokens, skip_special_tokens=True))
50
+ print("Random output should be expected, but no crashing")
51
+
52
+ print(f"Model+Tokenizer saved in {mname_tiny}")
53
+
54
+ # Push to Hugging Face Hub
55
+ tiny_model.push_to_hub(f"boom-project/{mname_tiny}")
56
+ tiny_tokenizer.push_to_hub(f"boom-project/{mname_tiny}")
57
+
58
+ print(f"Model and tokenizer pushed to boom-project/{mname_tiny}")