Spaces:

dev7halo
/

token_calculator

Running

App Files Files Community

dev7halo commited on Jul 16

Commit

1a69151

verified ·

1 Parent(s): 8e9646f

Update app.py

Browse files

Files changed (1) hide show

app.py +189 -94

app.py CHANGED Viewed

@@ -1,95 +1,190 @@
-import gradio as gr
-from transformers import AutoTokenizer
-import torch
-def count_tokens(model_name, text):
-    """토큰 수를 계산하는 함수"""
-    try:
-        if not model_name or not text:
-            return "모델명과 텍스트를 모두 입력해주세요."
-        # 토크나이저 로드
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        # 토큰화
-        tokens = tokenizer.encode(text)
-        token_count = len(tokens)
-        # 토큰 디코딩 (선택사항 - 토큰들을 보여주기 위해)
-        decoded_tokens = [tokenizer.decode([token]) for token in tokens]
-        result = f"토큰 수: {token_count}\n\n"
-        result += f"토큰들: {decoded_tokens[:50]}"  # 처음 50개만 표시
-        if len(decoded_tokens) > 50:
-            result += f"\n... (총 {len(decoded_tokens)}개 토큰 중 50개만 표시)"
-        return result
-    except Exception as e:
-        return f"오류 발생: {str(e)}\n\n모델명을 확인해주세요. 예: 'klue/bert-base', 'beomi/KcELECTRA-base'"
-# Gradio 인터페이스 생성
-def create_interface():
-    with gr.Blocks(title="토큰 계산기", theme=gr.themes.Soft()) as demo:
-        gr.Markdown("# 🔢 허깅페이스 모델 토큰 계산기")
-        gr.Markdown("허깅페이스에 올라온 모델의 토크나이저를 사용해 텍스트의 토큰 수를 계산합니다.")
-        with gr.Row():
-            with gr.Column():
-                model_input = gr.Textbox(
-                    label="모델명",
-                    placeholder="예: klue/bert-base, beomi/KcELECTRA-base, gpt2",
-                    value="klue/bert-base"
-                )
-                text_input = gr.Textbox(
-                    label="텍스트",
-                    placeholder="토큰 수를 계산할 텍스트를 입력하세요...",
-                    lines=5
-                )
-                calculate_btn = gr.Button("토큰 수 계산", variant="primary")
-            with gr.Column():
-                output = gr.Textbox(
-                    label="결과",
-                    lines=10,
-                    show_copy_button=True
-                )
-        # 예시 버튼들
-        gr.Markdown("### 자주 사용되는 모델 예시:")
-        with gr.Row():
-            example_models = [
-                "klue/bert-base",
-                "beomi/KcELECTRA-base",
-                "gpt2",
-                "microsoft/DialoGPT-medium"
-            ]
-            for model in example_models:
-                btn = gr.Button(model, size="sm")
-                btn.click(
-                    lambda x=model: x,
-                    outputs=model_input
-                )
-        # 이벤트 핸들러
-        calculate_btn.click(
-            count_tokens,
-            inputs=[model_input, text_input],
-            outputs=output
-        )
-        # 엔터키로도 실행 가능하게
-        text_input.submit(
-            count_tokens,
-            inputs=[model_input, text_input],
-            outputs=output
-        )
-    return demo
-if __name__ == "__main__":
-    demo = create_interface()
     demo.launch()

+import gradio as gr
+import os
+def count_tokens(model_name, text, hf_token=None):
+    """토큰 수를 계산하는 함수"""
+    try:
+        if not model_name or not text:
+            return "모델명과 텍스트를 모두 입력해주세요."
+        # transformers 임포트를 함수 내부에서 처리
+        from transformers import AutoTokenizer
+        # 토크나이저 로드 (토큰이 있으면 사용)
+        tokenizer_kwargs = {"trust_remote_code": True}
+        if hf_token and hf_token.strip():
+            tokenizer_kwargs["token"] = hf_token.strip()
+        tokenizer = AutoTokenizer.from_pretrained(model_name, **tokenizer_kwargs)
+        # 토큰화
+        tokens = tokenizer.encode(text)
+        token_count = len(tokens)
+        # 토큰 디코딩 (선택사항 - 토큰들을 보여주기 위해)
+        try:
+            decoded_tokens = [tokenizer.decode([token]) for token in tokens]
+        except:
+            decoded_tokens = ["토큰 디코딩 실패"]
+        result = f"✅ 토큰 수: {token_count}\n\n"
+        result += f"토큰들: {decoded_tokens[:50]}"  # 처음 50개만 표시
+        if len(decoded_tokens) > 50:
+            result += f"\n... (총 {len(decoded_tokens)}개 토큰 중 50개만 표시)"
+        return result
+    except Exception as e:
+        error_msg = f"❌ 오류 발생: {str(e)}\n\n"
+        if "gated repo" in str(e):
+            error_msg += "🔐 이 모델은 접근 권한이 필요합니다:\n"
+            error_msg += f"1. https://huggingface.co/{model_name} 에서 접근 권한을 요청하세요\n"
+            error_msg += "2. 허깅페이스 토큰을 입력하세요\n"
+            error_msg += "3. 토큰 생성: https://huggingface.co/settings/tokens\n\n"
+        elif "does not exist" in str(e) or "not found" in str(e):
+            error_msg += "📝 모델을 찾을 수 없습니다:\n"
+            error_msg += "1. 모델명을 확인해주세요\n"
+            error_msg += "2. 공개 모델 예시: 'klue/bert-base', 'beomi/KcELECTRA-base', 'gpt2'\n\n"
+        else:
+            error_msg += "🔧 가능한 해결 방법:\n"
+            error_msg += "1. 모델명을 확인해주세요\n"
+            error_msg += "2. 네트워크 연결을 확인해주세요\n"
+            error_msg += "3. 필요시 허깅페이스 토큰을 입력해주세요\n"
+        return error_msg
+def check_model_access(model_name, hf_token=None):
+    """모델 접근 가능 여부 확인"""
+    try:
+        if not model_name:
+            return "모델명을 입력해주세요."
+        from transformers import AutoTokenizer
+        tokenizer_kwargs = {"trust_remote_code": True}
+        if hf_token and hf_token.strip():
+            tokenizer_kwargs["token"] = hf_token.strip()
+        tokenizer = AutoTokenizer.from_pretrained(model_name, **tokenizer_kwargs)
+        return f"✅ {model_name} 모델 접근 가능합니다!"
+    except Exception as e:
+        if "gated repo" in str(e):
+            return f"🔐 {model_name}은 접근 권한이 필요한 모델입니다. 토큰을 입력해주세요."
+        elif "does not exist" in str(e):
+            return f"❌ {model_name} 모델을 찾을 수 없습니다."
+        else:
+            return f"❌ 오류: {str(e)}"
+# Gradio 인터페이스 생성
+def create_interface():
+    with gr.Blocks(title="토큰 계산기", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🔢 허깅페이스 모델 토큰 계산기")
+        gr.Markdown("허깅페이스에 올라온 모델의 토크나이저를 사용해 텍스트의 토큰 수를 계산합니다.")
+        with gr.Row():
+            with gr.Column():
+                model_input = gr.Textbox(
+                    label="모델명",
+                    placeholder="예: klue/bert-base, beomi/KcELECTRA-base, gpt2",
+                    value="klue/bert-base"
+                )
+                token_input = gr.Textbox(
+                    label="허깅페이스 토큰 (선택사항)",
+                    placeholder="gated 모델 사용시 필요 (hf_xxx...)",
+                    type="password"
+                )
+                text_input = gr.Textbox(
+                    label="텍스트",
+                    placeholder="토큰 수를 계산할 텍스트를 입력하세요...",
+                    lines=5
+                )
+                with gr.Row():
+                    check_btn = gr.Button("모델 접근 확인", variant="secondary")
+                    calculate_btn = gr.Button("토큰 수 계산", variant="primary")
+            with gr.Column():
+                output = gr.Textbox(
+                    label="결과",
+                    lines=10,
+                    show_copy_button=True
+                )
+        # 모델 카테고리별 예시
+        with gr.Tabs():
+            with gr.TabItem("공개 모델 (토큰 불필요)"):
+                gr.Markdown("### 자유롭게 사용 가능한 모델들:")
+                with gr.Row():
+                    public_models = [
+                        "klue/bert-base",
+                        "beomi/KcELECTRA-base",
+                        "gpt2",
+                        "microsoft/DialoGPT-medium"
+                    ]
+                    for model in public_models:
+                        btn = gr.Button(model, size="sm")
+                        btn.click(lambda x=model: x, outputs=model_input)
+            with gr.TabItem("제한된 모델 (토큰 필요)"):
+                gr.Markdown("### 접근 권한이 필요한 모델들:")
+                gr.Markdown("⚠️ 이 모델들은 허깅페이스 토큰이 필요합니다")
+                with gr.Row():
+                    gated_models = [
+                        "meta-llama/Llama-2-7b-hf",
+                        "google/gemma-7b",
+                        "mistralai/Mistral-7B-v0.1"
+                    ]
+                    for model in gated_models:
+                        btn = gr.Button(model, size="sm")
+                        btn.click(lambda x=model: x, outputs=model_input)
+        # 토큰 가이드
+        with gr.Accordion("🔑 허깅페이스 토큰 가이드", open=False):
+            gr.Markdown("""
+            ### 토큰이 필요한 경우:
+            1. **Gated 모델**: Meta Llama, Google Gemma 등
+            2. **비공개 모델**: 개인이나 조직의 private 모델
+            ### 토큰 생성 방법:
+            1. [허깅페이스 토큰 페이지](https://huggingface.co/settings/tokens) 접속
+            2. "New token" 클릭
+            3. "Read" 권한으로 토큰 생성
+            4. 생성된 토큰을 위의 "허깅페이스 토큰" 필드에 입력
+            ### 모델 접근 권한 요청:
+            1. 사용하려는 모델 페이지 방문
+            2. "Request access" 버튼 클릭
+            3. 승인 후 토큰과 함께 사용
+            """)
+        # 이벤트 핸들러
+        check_btn.click(
+            check_model_access,
+            inputs=[model_input, token_input],
+            outputs=output
+        )
+        calculate_btn.click(
+            count_tokens,
+            inputs=[model_input, text_input, token_input],
+            outputs=output
+        )
+        # 엔터키로도 실행 가능하게
+        text_input.submit(
+            count_tokens,
+            inputs=[model_input, text_input, token_input],
+            outputs=output
+        )
+    return demo
+if __name__ == "__main__":
+    demo = create_interface()
     demo.launch()