xzuyn commited on
Commit
23a5098
·
verified ·
1 Parent(s): 93d9f1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -89
app.py CHANGED
@@ -3,48 +3,21 @@ import gradio as gr
3
 
4
 
5
  def tokenize(input_text):
6
- llama_tokens = len(
7
- llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
8
- )
9
- llama3_tokens = len(
10
- llama3_tokenizer(input_text, add_special_tokens=True)["input_ids"]
11
- )
12
- mistral_tokens = len(
13
- mistral_tokenizer(input_text, add_special_tokens=True)["input_ids"]
14
- )
15
- gpt2_tokens = len(
16
- gpt2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
17
- )
18
- gpt_neox_tokens = len(
19
- gpt_neox_tokenizer(input_text, add_special_tokens=True)["input_ids"]
20
- )
21
- falcon_tokens = len(
22
- falcon_tokenizer(input_text, add_special_tokens=True)["input_ids"]
23
- )
24
- phi2_tokens = len(
25
- phi2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
26
- )
27
- phi3_tokens = len(
28
- phi3_tokenizer(input_text, add_special_tokens=True)["input_ids"]
29
- )
30
- t5_tokens = len(
31
- t5_tokenizer(input_text, add_special_tokens=True)["input_ids"]
32
- )
33
- gemma_tokens = len(
34
- gemma_tokenizer(input_text, add_special_tokens=True)["input_ids"]
35
- )
36
- command_r_tokens = len(
37
- command_r_tokenizer(input_text, add_special_tokens=True)["input_ids"]
38
- )
39
- qwen_tokens = len(
40
- qwen_tokenizer(input_text, add_special_tokens=True)["input_ids"]
41
- )
42
- codeqwen_tokens = len(
43
- codeqwen_tokenizer(input_text, add_special_tokens=True)["input_ids"]
44
- )
45
- rwkv_tokens = len(
46
- rwkv_tokenizer(input_text, add_special_tokens=True)["input_ids"]
47
- )
48
 
49
  results = {
50
  "LLaMa-1/LLaMa-2": llama_tokens,
@@ -60,7 +33,8 @@ def tokenize(input_text):
60
  "Command-R": command_r_tokens,
61
  "Qwen/Qwen1.5": qwen_tokens,
62
  "CodeQwen": codeqwen_tokens,
63
- "v5-RWKV": rwkv_tokens
 
64
  }
65
 
66
  # Sort the results in descending order based on token length
@@ -70,51 +44,21 @@ def tokenize(input_text):
70
 
71
 
72
  if __name__ == "__main__":
73
- llama_tokenizer = AutoTokenizer.from_pretrained(
74
- "TheBloke/Llama-2-7B-fp16"
75
- )
76
- llama3_tokenizer = AutoTokenizer.from_pretrained(
77
- "unsloth/llama-3-8b"
78
- )
79
- mistral_tokenizer = AutoTokenizer.from_pretrained(
80
- "mistral-community/Mistral-7B-v0.2"
81
- )
82
- gpt2_tokenizer = AutoTokenizer.from_pretrained(
83
- "gpt2"
84
- )
85
- gpt_neox_tokenizer = AutoTokenizer.from_pretrained(
86
- "EleutherAI/gpt-neox-20b"
87
- )
88
- falcon_tokenizer = AutoTokenizer.from_pretrained(
89
- "tiiuae/falcon-7b"
90
- )
91
- phi2_tokenizer = AutoTokenizer.from_pretrained(
92
- "microsoft/phi-2"
93
- )
94
- phi3_tokenizer = AutoTokenizer.from_pretrained(
95
- "microsoft/Phi-3-mini-4k-instruct"
96
- )
97
- t5_tokenizer = AutoTokenizer.from_pretrained(
98
- "google/flan-t5-xxl"
99
- )
100
- gemma_tokenizer = AutoTokenizer.from_pretrained(
101
- "alpindale/gemma-2b"
102
- )
103
- command_r_tokenizer = AutoTokenizer.from_pretrained(
104
- "CohereForAI/c4ai-command-r-plus"
105
- )
106
- qwen_tokenizer = AutoTokenizer.from_pretrained(
107
- "Qwen/Qwen1.5-7B"
108
- )
109
- codeqwen_tokenizer = AutoTokenizer.from_pretrained(
110
- "Qwen/CodeQwen1.5-7B"
111
- )
112
- rwkv_tokenizer = AutoTokenizer.from_pretrained(
113
- "RWKV/v5-EagleX-v2-7B-HF",
114
- trust_remote_code=True
115
- )
116
 
117
- iface = gr.Interface(
118
- fn=tokenize, inputs=gr.Textbox(label="Input Text", lines=14), outputs="text"
119
- )
120
  iface.launch()
 
3
 
4
 
5
  def tokenize(input_text):
6
+ llama_tokens = len(llama_tokenizer(input_text, add_special_tokens=True)["input_ids"])
7
+ llama3_tokens = len(llama3_tokenizer(input_text, add_special_tokens=True)["input_ids"])
8
+ mistral_tokens = len(mistral_tokenizer(input_text, add_special_tokens=True)["input_ids"])
9
+ gpt2_tokens = len(gpt2_tokenizer(input_text, add_special_tokens=True)["input_ids"])
10
+ gpt_neox_tokens = len(gpt_neox_tokenizer(input_text, add_special_tokens=True)["input_ids"])
11
+ falcon_tokens = len(falcon_tokenizer(input_text, add_special_tokens=True)["input_ids"])
12
+ phi2_tokens = len(phi2_tokenizer(input_text, add_special_tokens=True)["input_ids"])
13
+ phi3_tokens = len(phi3_tokenizer(input_text, add_special_tokens=True)["input_ids"])
14
+ t5_tokens = len(t5_tokenizer(input_text, add_special_tokens=True)["input_ids"])
15
+ gemma_tokens = len(gemma_tokenizer(input_text, add_special_tokens=True)["input_ids"])
16
+ command_r_tokens = len(command_r_tokenizer(input_text, add_special_tokens=True)["input_ids"])
17
+ qwen_tokens = len(qwen_tokenizer(input_text, add_special_tokens=True)["input_ids"])
18
+ codeqwen_tokens = len(codeqwen_tokenizer(input_text, add_special_tokens=True)["input_ids"])
19
+ rwkv4_tokens = len(rwkv4_tokenizer(input_text, add_special_tokens=True)["input_ids"])
20
+ rwkv5_tokens = len(rwkv5_tokenizer(input_text, add_special_tokens=True)["input_ids"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  results = {
23
  "LLaMa-1/LLaMa-2": llama_tokens,
 
33
  "Command-R": command_r_tokens,
34
  "Qwen/Qwen1.5": qwen_tokens,
35
  "CodeQwen": codeqwen_tokens,
36
+ "RWKV-v4": rwkv4_tokens,
37
+ "RWKV-v5/RWKV-v6": rwkv5_tokens
38
  }
39
 
40
  # Sort the results in descending order based on token length
 
44
 
45
 
46
  if __name__ == "__main__":
47
+ llama_tokenizer = AutoTokenizer.from_pretrained("TheBloke/Llama-2-7B-fp16")
48
+ llama3_tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3-8b")
49
+ mistral_tokenizer = AutoTokenizer.from_pretrained("mistral-community/Mistral-7B-v0.2")
50
+ gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
51
+ gpt_neox_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
52
+ falcon_tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b")
53
+ phi2_tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
54
+ phi3_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
55
+ t5_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xxl")
56
+ gemma_tokenizer = AutoTokenizer.from_pretrained("alpindale/gemma-2b")
57
+ command_r_tokenizer = AutoTokenizer.from_pretrained("CohereForAI/c4ai-command-r-plus")
58
+ qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-7B")
59
+ codeqwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/CodeQwen1.5-7B")
60
+ rwkv4_tokenizer = AutoTokenizer.from_pretrained("RWKV/rwkv-4-14b-pile", trust_remote_code=True)
61
+ rwkv5_tokenizer = AutoTokenizer.from_pretrained("RWKV/v5-EagleX-v2-7B-HF", trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ iface = gr.Interface(fn=tokenize, inputs=gr.Textbox(label="Input Text", lines=15), outputs="text")
 
 
64
  iface.launch()