Spaces:

kimhyunwoo
/

bitnet

Running

kimhyunwoo commited on Apr 20

Commit

ece9655

verified ·

1 Parent(s): 4bf6a80

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,24 +12,26 @@ os.environ["TRANSFORMERS_VERBOSITY"] = "error"
 # AutoModelForCausalLM과 AutoTokenizer를 로드합니다.
 # BitNet 모델은 trust_remote_code=True가 필요합니다.
-# bf16은 메모리 사용량을 줄이고 속도를 향상시킬 수 있습니다 (GPU 지원 시).
-# CPU만 사용하는 경우 torch_dtype을 생략하거나 torch.float32로 설정할 수 있습니다.
 try:
     print(f"모델 로딩 중: {model_id}...")
-    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
     # GPU가 사용 가능하면 bf16 사용
     if torch.cuda.is_available():
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             torch_dtype=torch.bfloat16,
             trust_remote_code=True
         ).to("cuda") # GPU로 모델 이동
         print("GPU를 사용하여 모델 로드 완료.")
     else:
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             trust_remote_code=True
         )
         print("CPU를 사용하여 모델 로드 완료. 성능이 느릴 수 있습니다.")
 except Exception as e:
@@ -86,7 +88,7 @@ if model is not None and tokenizer is not None:
     )
     # Gradio 앱 실행
-    # share=True를 하면 임시 공개 링크가 생성됩니다.
-    interface.launch(share=False)
 else:
     print("모델 로드 실패로 인해 Gradio 인터페이스를 실행할 수 없습니다.")

 # AutoModelForCausalLM과 AutoTokenizer를 로드합니다.
 # BitNet 모델은 trust_remote_code=True가 필요합니다.
+# GitHub 특정 브랜치에서 설치한 transformers를 사용합니다.
 try:
     print(f"모델 로딩 중: {model_id}...")
     # GPU가 사용 가능하면 bf16 사용
     if torch.cuda.is_available():
+        # torch_dtype을 명시적으로 설정하여 로드 오류 방지 시도
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             torch_dtype=torch.bfloat16,
             trust_remote_code=True
         ).to("cuda") # GPU로 모델 이동
+        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
         print("GPU를 사용하여 모델 로드 완료.")
     else:
+        # CPU 사용 시 torch_dtype 생략 또는 float32
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             trust_remote_code=True
         )
+        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
         print("CPU를 사용하여 모델 로드 완료. 성능이 느릴 수 있습니다.")
 except Exception as e:
     )
     # Gradio 앱 실행
+    # Hugging Face Spaces에서는 share=True가 자동으로 설정됩니다.
+    interface.launch()
 else:
     print("모델 로드 실패로 인해 Gradio 인터페이스를 실행할 수 없습니다.")