dev7halo commited on
Commit
1a69151
ยท
verified ยท
1 Parent(s): 8e9646f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +189 -94
app.py CHANGED
@@ -1,95 +1,190 @@
1
- import gradio as gr
2
- from transformers import AutoTokenizer
3
- import torch
4
-
5
- def count_tokens(model_name, text):
6
- """ํ† ํฐ ์ˆ˜๋ฅผ ๊ณ„์‚ฐํ•˜๋Š” ํ•จ์ˆ˜"""
7
- try:
8
- if not model_name or not text:
9
- return "๋ชจ๋ธ๋ช…๊ณผ ํ…์ŠคํŠธ๋ฅผ ๋ชจ๋‘ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
10
-
11
- # ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
12
- tokenizer = AutoTokenizer.from_pretrained(model_name)
13
-
14
- # ํ† ํฐํ™”
15
- tokens = tokenizer.encode(text)
16
- token_count = len(tokens)
17
-
18
- # ํ† ํฐ ๋””์ฝ”๋”ฉ (์„ ํƒ์‚ฌํ•ญ - ํ† ํฐ๋“ค์„ ๋ณด์—ฌ์ฃผ๊ธฐ ์œ„ํ•ด)
19
- decoded_tokens = [tokenizer.decode([token]) for token in tokens]
20
-
21
- result = f"ํ† ํฐ ์ˆ˜: {token_count}\n\n"
22
- result += f"ํ† ํฐ๋“ค: {decoded_tokens[:50]}" # ์ฒ˜์Œ 50๊ฐœ๋งŒ ํ‘œ์‹œ
23
- if len(decoded_tokens) > 50:
24
- result += f"\n... (์ด {len(decoded_tokens)}๊ฐœ ํ† ํฐ ์ค‘ 50๊ฐœ๋งŒ ํ‘œ์‹œ)"
25
-
26
- return result
27
-
28
- except Exception as e:
29
- return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}\n\n๋ชจ๋ธ๋ช…์„ ํ™•์ธํ•ด์ฃผ์„ธ์š”. ์˜ˆ: 'klue/bert-base', 'beomi/KcELECTRA-base'"
30
-
31
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
32
- def create_interface():
33
- with gr.Blocks(title="ํ† ํฐ ๊ณ„์‚ฐ๊ธฐ", theme=gr.themes.Soft()) as demo:
34
- gr.Markdown("# ๐Ÿ”ข ํ—ˆ๊น…ํŽ˜์ด์Šค ๋ชจ๋ธ ํ† ํฐ ๊ณ„์‚ฐ๊ธฐ")
35
- gr.Markdown("ํ—ˆ๊น…ํŽ˜์ด์Šค์— ์˜ฌ๋ผ์˜จ ๋ชจ๋ธ์˜ ํ† ํฌ๋‚˜์ด์ €๋ฅผ ์‚ฌ์šฉํ•ด ํ…์ŠคํŠธ์˜ ํ† ํฐ ์ˆ˜๋ฅผ ๊ณ„์‚ฐํ•ฉ๋‹ˆ๋‹ค.")
36
-
37
- with gr.Row():
38
- with gr.Column():
39
- model_input = gr.Textbox(
40
- label="๋ชจ๋ธ๋ช…",
41
- placeholder="์˜ˆ: klue/bert-base, beomi/KcELECTRA-base, gpt2",
42
- value="klue/bert-base"
43
- )
44
-
45
- text_input = gr.Textbox(
46
- label="ํ…์ŠคํŠธ",
47
- placeholder="ํ† ํฐ ์ˆ˜๋ฅผ ๊ณ„์‚ฐํ•  ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”...",
48
- lines=5
49
- )
50
-
51
- calculate_btn = gr.Button("ํ† ํฐ ์ˆ˜ ๊ณ„์‚ฐ", variant="primary")
52
-
53
- with gr.Column():
54
- output = gr.Textbox(
55
- label="๊ฒฐ๊ณผ",
56
- lines=10,
57
- show_copy_button=True
58
- )
59
-
60
- # ์˜ˆ์‹œ ๋ฒ„ํŠผ๋“ค
61
- gr.Markdown("### ์ž์ฃผ ์‚ฌ์šฉ๋˜๋Š” ๋ชจ๋ธ ์˜ˆ์‹œ:")
62
- with gr.Row():
63
- example_models = [
64
- "klue/bert-base",
65
- "beomi/KcELECTRA-base",
66
- "gpt2",
67
- "microsoft/DialoGPT-medium"
68
- ]
69
-
70
- for model in example_models:
71
- btn = gr.Button(model, size="sm")
72
- btn.click(
73
- lambda x=model: x,
74
- outputs=model_input
75
- )
76
-
77
- # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
78
- calculate_btn.click(
79
- count_tokens,
80
- inputs=[model_input, text_input],
81
- outputs=output
82
- )
83
-
84
- # ์—”ํ„ฐํ‚ค๋กœ๋„ ์‹คํ–‰ ๊ฐ€๋Šฅํ•˜๊ฒŒ
85
- text_input.submit(
86
- count_tokens,
87
- inputs=[model_input, text_input],
88
- outputs=output
89
- )
90
-
91
- return demo
92
-
93
- if __name__ == "__main__":
94
- demo = create_interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  demo.launch()
 
1
+ import gradio as gr
2
+ import os
3
+
4
+ def count_tokens(model_name, text, hf_token=None):
5
+ """ํ† ํฐ ์ˆ˜๋ฅผ ๊ณ„์‚ฐํ•˜๋Š” ํ•จ์ˆ˜"""
6
+ try:
7
+ if not model_name or not text:
8
+ return "๋ชจ๋ธ๋ช…๊ณผ ํ…์ŠคํŠธ๋ฅผ ๋ชจ๋‘ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
9
+
10
+ # transformers ์ž„ํฌํŠธ๋ฅผ ํ•จ์ˆ˜ ๋‚ด๋ถ€์—์„œ ์ฒ˜๋ฆฌ
11
+ from transformers import AutoTokenizer
12
+
13
+ # ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ (ํ† ํฐ์ด ์žˆ์œผ๋ฉด ์‚ฌ์šฉ)
14
+ tokenizer_kwargs = {"trust_remote_code": True}
15
+ if hf_token and hf_token.strip():
16
+ tokenizer_kwargs["token"] = hf_token.strip()
17
+
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name, **tokenizer_kwargs)
19
+
20
+ # ํ† ํฐํ™”
21
+ tokens = tokenizer.encode(text)
22
+ token_count = len(tokens)
23
+
24
+ # ํ† ํฐ ๋””์ฝ”๋”ฉ (์„ ํƒ์‚ฌํ•ญ - ํ† ํฐ๋“ค์„ ๋ณด์—ฌ์ฃผ๊ธฐ ์œ„ํ•ด)
25
+ try:
26
+ decoded_tokens = [tokenizer.decode([token]) for token in tokens]
27
+ except:
28
+ decoded_tokens = ["ํ† ํฐ ๋””์ฝ”๋”ฉ ์‹คํŒจ"]
29
+
30
+ result = f"โœ… ํ† ํฐ ์ˆ˜: {token_count}\n\n"
31
+ result += f"ํ† ํฐ๋“ค: {decoded_tokens[:50]}" # ์ฒ˜์Œ 50๊ฐœ๋งŒ ํ‘œ์‹œ
32
+ if len(decoded_tokens) > 50:
33
+ result += f"\n... (์ด {len(decoded_tokens)}๊ฐœ ํ† ํฐ ์ค‘ 50๊ฐœ๋งŒ ํ‘œ์‹œ)"
34
+
35
+ return result
36
+
37
+ except Exception as e:
38
+ error_msg = f"โŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}\n\n"
39
+
40
+ if "gated repo" in str(e):
41
+ error_msg += "๐Ÿ” ์ด ๋ชจ๋ธ์€ ์ ‘๊ทผ ๊ถŒํ•œ์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค:\n"
42
+ error_msg += f"1. https://huggingface.co/{model_name} ์—์„œ ์ ‘๊ทผ ๊ถŒํ•œ์„ ์š”์ฒญํ•˜์„ธ์š”\n"
43
+ error_msg += "2. ํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ์„ ์ž…๋ ฅํ•˜์„ธ์š”\n"
44
+ error_msg += "3. ํ† ํฐ ์ƒ์„ฑ: https://huggingface.co/settings/tokens\n\n"
45
+ elif "does not exist" in str(e) or "not found" in str(e):
46
+ error_msg += "๐Ÿ“ ๋ชจ๋ธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค:\n"
47
+ error_msg += "1. ๋ชจ๋ธ๋ช…์„ ํ™•์ธํ•ด์ฃผ์„ธ์š”\n"
48
+ error_msg += "2. ๊ณต๊ฐœ ๋ชจ๋ธ ์˜ˆ์‹œ: 'klue/bert-base', 'beomi/KcELECTRA-base', 'gpt2'\n\n"
49
+ else:
50
+ error_msg += "๐Ÿ”ง ๊ฐ€๋Šฅํ•œ ํ•ด๊ฒฐ ๋ฐฉ๋ฒ•:\n"
51
+ error_msg += "1. ๋ชจ๋ธ๋ช…์„ ํ™•์ธํ•ด์ฃผ์„ธ์š”\n"
52
+ error_msg += "2. ๋„คํŠธ์›Œํฌ ์—ฐ๊ฒฐ์„ ํ™•์ธํ•ด์ฃผ์„ธ์š”\n"
53
+ error_msg += "3. ํ•„์š”์‹œ ํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”\n"
54
+
55
+ return error_msg
56
+
57
+ def check_model_access(model_name, hf_token=None):
58
+ """๋ชจ๋ธ ์ ‘๊ทผ ๊ฐ€๋Šฅ ์—ฌ๋ถ€ ํ™•์ธ"""
59
+ try:
60
+ if not model_name:
61
+ return "๋ชจ๋ธ๋ช…์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
62
+
63
+ from transformers import AutoTokenizer
64
+
65
+ tokenizer_kwargs = {"trust_remote_code": True}
66
+ if hf_token and hf_token.strip():
67
+ tokenizer_kwargs["token"] = hf_token.strip()
68
+
69
+ tokenizer = AutoTokenizer.from_pretrained(model_name, **tokenizer_kwargs)
70
+ return f"โœ… {model_name} ๋ชจ๋ธ ์ ‘๊ทผ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค!"
71
+
72
+ except Exception as e:
73
+ if "gated repo" in str(e):
74
+ return f"๐Ÿ” {model_name}์€ ์ ‘๊ทผ ๊ถŒํ•œ์ด ํ•„์š”ํ•œ ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค. ํ† ํฐ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
75
+ elif "does not exist" in str(e):
76
+ return f"โŒ {model_name} ๋ชจ๋ธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
77
+ else:
78
+ return f"โŒ ์˜ค๋ฅ˜: {str(e)}"
79
+
80
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
81
+ def create_interface():
82
+ with gr.Blocks(title="ํ† ํฐ ๊ณ„์‚ฐ๊ธฐ", theme=gr.themes.Soft()) as demo:
83
+ gr.Markdown("# ๐Ÿ”ข ํ—ˆ๊น…ํŽ˜์ด์Šค ๋ชจ๋ธ ํ† ํฐ ๊ณ„์‚ฐ๊ธฐ")
84
+ gr.Markdown("ํ—ˆ๊น…ํŽ˜์ด์Šค์— ์˜ฌ๋ผ์˜จ ๋ชจ๋ธ์˜ ํ† ํฌ๋‚˜์ด์ €๋ฅผ ์‚ฌ์šฉํ•ด ํ…์ŠคํŠธ์˜ ํ† ํฐ ์ˆ˜๋ฅผ ๊ณ„์‚ฐํ•ฉ๋‹ˆ๋‹ค.")
85
+
86
+ with gr.Row():
87
+ with gr.Column():
88
+ model_input = gr.Textbox(
89
+ label="๋ชจ๋ธ๋ช…",
90
+ placeholder="์˜ˆ: klue/bert-base, beomi/KcELECTRA-base, gpt2",
91
+ value="klue/bert-base"
92
+ )
93
+
94
+ token_input = gr.Textbox(
95
+ label="ํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ (์„ ํƒ์‚ฌํ•ญ)",
96
+ placeholder="gated ๋ชจ๋ธ ์‚ฌ์šฉ์‹œ ํ•„์š” (hf_xxx...)",
97
+ type="password"
98
+ )
99
+
100
+ text_input = gr.Textbox(
101
+ label="ํ…์ŠคํŠธ",
102
+ placeholder="ํ† ํฐ ์ˆ˜๋ฅผ ๊ณ„์‚ฐํ•  ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”...",
103
+ lines=5
104
+ )
105
+
106
+ with gr.Row():
107
+ check_btn = gr.Button("๋ชจ๋ธ ์ ‘๊ทผ ํ™•์ธ", variant="secondary")
108
+ calculate_btn = gr.Button("ํ† ํฐ ์ˆ˜ ๊ณ„์‚ฐ", variant="primary")
109
+
110
+ with gr.Column():
111
+ output = gr.Textbox(
112
+ label="๊ฒฐ๊ณผ",
113
+ lines=10,
114
+ show_copy_button=True
115
+ )
116
+
117
+ # ๋ชจ๋ธ ์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ์˜ˆ์‹œ
118
+ with gr.Tabs():
119
+ with gr.TabItem("๊ณต๊ฐœ ๋ชจ๋ธ (ํ† ํฐ ๋ถˆํ•„์š”)"):
120
+ gr.Markdown("### ์ž์œ ๋กญ๊ฒŒ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ๋“ค:")
121
+ with gr.Row():
122
+ public_models = [
123
+ "klue/bert-base",
124
+ "beomi/KcELECTRA-base",
125
+ "gpt2",
126
+ "microsoft/DialoGPT-medium"
127
+ ]
128
+
129
+ for model in public_models:
130
+ btn = gr.Button(model, size="sm")
131
+ btn.click(lambda x=model: x, outputs=model_input)
132
+
133
+ with gr.TabItem("์ œํ•œ๋œ ๋ชจ๋ธ (ํ† ํฐ ํ•„์š”)"):
134
+ gr.Markdown("### ์ ‘๊ทผ ๊ถŒํ•œ์ด ํ•„์š”ํ•œ ๋ชจ๋ธ๋“ค:")
135
+ gr.Markdown("โš ๏ธ ์ด ๋ชจ๋ธ๋“ค์€ ํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค")
136
+ with gr.Row():
137
+ gated_models = [
138
+ "meta-llama/Llama-2-7b-hf",
139
+ "google/gemma-7b",
140
+ "mistralai/Mistral-7B-v0.1"
141
+ ]
142
+
143
+ for model in gated_models:
144
+ btn = gr.Button(model, size="sm")
145
+ btn.click(lambda x=model: x, outputs=model_input)
146
+
147
+ # ํ† ํฐ ๊ฐ€์ด๋“œ
148
+ with gr.Accordion("๐Ÿ”‘ ํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ ๊ฐ€์ด๋“œ", open=False):
149
+ gr.Markdown("""
150
+ ### ํ† ํฐ์ด ํ•„์š”ํ•œ ๊ฒฝ์šฐ:
151
+ 1. **Gated ๋ชจ๋ธ**: Meta Llama, Google Gemma ๋“ฑ
152
+ 2. **๋น„๊ณต๊ฐœ ๋ชจ๋ธ**: ๊ฐœ์ธ์ด๋‚˜ ์กฐ์ง์˜ private ๋ชจ๋ธ
153
+
154
+ ### ํ† ํฐ ์ƒ์„ฑ ๋ฐฉ๋ฒ•:
155
+ 1. [ํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ ํŽ˜์ด์ง€](https://huggingface.co/settings/tokens) ์ ‘์†
156
+ 2. "New token" ํด๋ฆญ
157
+ 3. "Read" ๊ถŒํ•œ์œผ๋กœ ํ† ํฐ ์ƒ์„ฑ
158
+ 4. ์ƒ์„ฑ๋œ ํ† ํฐ์„ ์œ„์˜ "ํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ" ํ•„๋“œ์— ์ž…๋ ฅ
159
+
160
+ ### ๋ชจ๋ธ ์ ‘๊ทผ ๊ถŒํ•œ ์š”์ฒญ:
161
+ 1. ์‚ฌ์šฉํ•˜๋ ค๋Š” ๋ชจ๋ธ ํŽ˜์ด์ง€ ๋ฐฉ๋ฌธ
162
+ 2. "Request access" ๋ฒ„ํŠผ ํด๋ฆญ
163
+ 3. ์Šน์ธ ํ›„ ํ† ํฐ๊ณผ ํ•จ๊ป˜ ์‚ฌ์šฉ
164
+ """)
165
+
166
+ # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
167
+ check_btn.click(
168
+ check_model_access,
169
+ inputs=[model_input, token_input],
170
+ outputs=output
171
+ )
172
+
173
+ calculate_btn.click(
174
+ count_tokens,
175
+ inputs=[model_input, text_input, token_input],
176
+ outputs=output
177
+ )
178
+
179
+ # ์—”ํ„ฐํ‚ค๋กœ๋„ ์‹คํ–‰ ๊ฐ€๋Šฅํ•˜๊ฒŒ
180
+ text_input.submit(
181
+ count_tokens,
182
+ inputs=[model_input, text_input, token_input],
183
+ outputs=output
184
+ )
185
+
186
+ return demo
187
+
188
+ if __name__ == "__main__":
189
+ demo = create_interface()
190
  demo.launch()