ROBO-R1984

Runtime error

App Files Files Community

openfree commited on Jun 16

Commit

9d13074

verified ·

1 Parent(s): 3c39492

Update app.py

Browse files

Files changed (1) hide show

app.py +126 -48

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ import PyPDF2
 warnings.filterwarnings('ignore')
-print("🎮 로봇 시각 시스템 초기화 (Gemma3-R1984-4B GGUF Q4_K_M)...")
 ##############################################################################
 # 상수 정의
@@ -50,7 +50,7 @@ SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
 ##############################################################################
 llm = None
 model_loaded = False
-model_name = "Gemma3-R1984-4B-Q4_K_M"
 ##############################################################################
 # 메모리 관리
@@ -215,34 +215,73 @@ def image_to_base64_data_uri(image: Union[np.ndarray, Image.Image]) -> str:
 ##############################################################################
 def download_model_files():
     """Hugging Face Hub에서 모델 파일 다운로드"""
-    model_repo = "VIDraft/Gemma-3-R1984-4B-GGUF"
-    # 메인 모델 다운로드
-    model_filename = "Gemma-3-R1984-4B.Q4_K_M.gguf"
-    logger.info(f"모델 다운로드 중: {model_filename}")
-    model_path = hf_hub_download(
-        repo_id=model_repo,
-        filename=model_filename,
-        resume_download=True,
-        local_files_only=False
-    )
-    # Vision projection 파일 다운로드
-    mmproj_filename = "Gemma-3-R1984-4B.mmproj-Q8_0.gguf"
-    logger.info(f"Vision 모델 다운로드 중: {mmproj_filename}")
-    mmproj_path = hf_hub_download(
-        repo_id=model_repo,
-        filename=mmproj_filename,
-        resume_download=True,
-        local_files_only=False
-    )
-    logger.info(f"모델 경로: {model_path}")
-    logger.info(f"Vision 경로: {mmproj_path}")
-    return model_path, mmproj_path
 @spaces.GPU(duration=120)
 def load_model():
@@ -253,7 +292,7 @@ def load_model():
         return True
     try:
-        logger.info("Gemma3-R1984-4B GGUF Q4_K_M 모델 로딩 시작...")
         clear_cuda_cache()
         # 모델 파일 다운로드
@@ -262,26 +301,40 @@ def load_model():
         # GPU 사용 가능 여부 확인
         n_gpu_layers = -1 if torch.cuda.is_available() else 0
-        # 채팅 핸들러 생성 (비전 지원)
-        chat_handler = Llava16ChatHandler(
-            clip_model_path=mmproj_path,
-            verbose=False
-        )
         # 모델 로드
-        llm = Llama(
-            model_path=model_path,
-            chat_handler=chat_handler,
-            n_ctx=4096,  # 컨텍스트 크기
-            n_gpu_layers=n_gpu_layers,  # GPU 레이어
-            n_threads=8,  # CPU 스레드
-            verbose=False,
-            seed=42,
-            logits_all=True,  # 비전 모델에 필요
-        )
         model_loaded = True
-        logger.info(f"✅ {model_name} 로딩 완료!")
         return True
     except Exception as e:
@@ -342,6 +395,31 @@ def analyze_image_for_robot(
             return "❌ 모델 로딩 실패"
     try:
         # 이미지를 base64로 변환
         image_uri = image_to_base64_data_uri(image)
@@ -537,14 +615,14 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B GGUF)", css=css) a
     gr.HTML("""
     <div class="robot-header">
         <h1>🤖 로봇 시각 시스템</h1>
-        <h3>🎮 Gemma3-R1984-4B GGUF Q4_K_M + 📷 실시간 웹캠 + 🔍 웹 검색</h3>
         <p>⚡ 양자화 모델로 더 빠르고 효율적인 로봇 작업 분석!</p>
     </div>
     """)
     gr.HTML("""
     <div class="model-info">
-        <strong>모델:</strong> Gemma3-R1984-4B Q4_K_M (2.49GB) | <strong>메모리 사용:</strong> ~3-4GB VRAM
     </div>
     """)
@@ -851,7 +929,7 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B GGUF)", css=css) a
     )
 if __name__ == "__main__":
-    print("🚀 로봇 시각 시스템 시작 (Gemma3-R1984-4B GGUF Q4_K_M)...")
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,

 warnings.filterwarnings('ignore')
+print("🎮 로봇 시각 시스템 초기화 (Gemma3-4B GGUF Q4_K_M)...")
 ##############################################################################
 # 상수 정의
 ##############################################################################
 llm = None
 model_loaded = False
+model_name = "Gemma3-4B-GGUF-Q4_K_M"
 ##############################################################################
 # 메모리 관리
 ##############################################################################
 def download_model_files():
     """Hugging Face Hub에서 모델 파일 다운로드"""
+    # 여러 가능한 저장소 시도
+    model_repos = [
+        # 첫 번째 시도: 일반적인 Gemma 3 4B GGUF
+        {
+            "repo": "Mungert/gemma-3-4b-it-gguf",
+            "model": "google_gemma-3-4b-it-q4_k_m.gguf",
+            "mmproj": "google_gemma-3-4b-it-mmproj-bf16.gguf"
+        },
+        # 두 번째 시도: LM Studio 버전
+        {
+            "repo": "lmstudio-community/gemma-3-4b-it-GGUF",
+            "model": "gemma-3-4b-it-Q4_K_M.gguf",
+            "mmproj": "gemma-3-4b-it-mmproj-f16.gguf"
+        },
+        # 세 번째 시도: unsloth 버전
+        {
+            "repo": "unsloth/gemma-3-4b-it-GGUF",
+            "model": "gemma-3-4b-it.Q4_K_M.gguf",
+            "mmproj": "gemma-3-4b-it.mmproj.gguf"
+        }
+    ]
+    for repo_info in model_repos:
+        try:
+            logger.info(f"저장소 시도: {repo_info['repo']}")
+            # 메인 모델 다운로드
+            model_filename = repo_info["model"]
+            logger.info(f"모델 다운로드 중: {model_filename}")
+            model_path = hf_hub_download(
+                repo_id=repo_info["repo"],
+                filename=model_filename,
+                resume_download=True,
+                local_files_only=False
+            )
+            # Vision projection 파일 다운로드
+            mmproj_filename = repo_info["mmproj"]
+            logger.info(f"Vision 모델 다운로드 중: {mmproj_filename}")
+            try:
+                mmproj_path = hf_hub_download(
+                    repo_id=repo_info["repo"],
+                    filename=mmproj_filename,
+                    resume_download=True,
+                    local_files_only=False
+                )
+            except:
+                # mmproj 파일이 없을 수도 있음
+                logger.warning(f"Vision 모델을 찾을 수 없습니다: {mmproj_filename}")
+                logger.warning("텍스트 전용 모드로 진행합니다.")
+                mmproj_path = None
+            logger.info(f"✅ 모델 다운로드 성공!")
+            logger.info(f"모델 경로: {model_path}")
+            if mmproj_path:
+                logger.info(f"Vision 경로: {mmproj_path}")
+            return model_path, mmproj_path
+        except Exception as e:
+            logger.error(f"저장소 {repo_info['repo']} 시도 실패: {e}")
+            continue
+    # 모든 시도가 실패한 경우
+    raise Exception("사용 가능한 GGUF 모델을 찾을 수 없습니다. 인터넷 연결을 확인하세요.")
 @spaces.GPU(duration=120)
 def load_model():
         return True
     try:
+        logger.info("Gemma3-4B GGUF Q4_K_M 모델 로딩 시작...")
         clear_cuda_cache()
         # 모델 파일 다운로드
         # GPU 사용 가능 여부 확인
         n_gpu_layers = -1 if torch.cuda.is_available() else 0
+        # 채팅 핸들러 생성 (비전 지원 - mmproj가 있는 경우만)
+        chat_handler = None
+        if mmproj_path:
+            try:
+                chat_handler = Llava16ChatHandler(
+                    clip_model_path=mmproj_path,
+                    verbose=False
+                )
+                logger.info("✅ Vision 모델 로드 성공")
+            except Exception as e:
+                logger.warning(f"Vision 모델 로드 실패, 텍스트 전용 모드로 전환: {e}")
+                chat_handler = None
         # 모델 로드
+        llm_params = {
+            "model_path": model_path,
+            "n_ctx": 4096,  # 컨텍스트 크기
+            "n_gpu_layers": n_gpu_layers,  # GPU 레이어
+            "n_threads": 8,  # CPU 스레드
+            "verbose": False,
+            "seed": 42,
+        }
+        # chat_handler가 있으면 추가
+        if chat_handler:
+            llm_params["chat_handler"] = chat_handler
+            llm_params["logits_all"] = True  # 비전 모델에 필요
+        llm = Llama(**llm_params)
         model_loaded = True
+        logger.info(f"✅ Gemma3-4B 모델 로딩 완료!")
+        if not chat_handler:
+            logger.warning("⚠️ 텍스트 전용 모드로 실행 중입니다. 이미지 분석이 제한될 수 있습니다.")
         return True
     except Exception as e:
             return "❌ 모델 로딩 실패"
     try:
+        # Vision 모델이 없는 경우 경고
+        if not hasattr(llm, 'chat_handler') or llm.chat_handler is None:
+            logger.warning("Vision 모델이 로드되지 않았습니다. 텍스트 기반 분석만 가능합니다.")
+            # 텍스트 전용 분석
+            system_prompt = f"""당신은 로봇 시각 시스템 시뮬레이터입니다.
+실제 이미지를 볼 수는 없지만, 사용자의 설명을 바탕으로 로봇 작업을 계획하고 분석합니다.
+태스크 유형: {task_type}"""
+            messages = [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": f"[이미지 분석 요청] {prompt}"}
+            ]
+            response = llm.create_chat_completion(
+                messages=messages,
+                max_tokens=max_new_tokens,
+                temperature=0.7,
+                top_p=0.9,
+                stream=False
+            )
+            result = response['choices'][0]['message']['content'].strip()
+            return f"⚠️ 텍스트 전용 모드\n\n{result}"
         # 이미지를 base64로 변환
         image_uri = image_to_base64_data_uri(image)
     gr.HTML("""
     <div class="robot-header">
         <h1>🤖 로봇 시각 시스템</h1>
+        <h3>🎮 Gemma3-4B GGUF Q4_K_M + 📷 실시간 웹캠 + 🔍 웹 검색</h3>
         <p>⚡ 양자화 모델로 더 빠르고 효율적인 로봇 작업 분석!</p>
     </div>
     """)
     gr.HTML("""
     <div class="model-info">
+        <strong>모델:</strong> Gemma3-4B Q4_K_M (2.5GB) | <strong>메모리 사용:</strong> ~3-4GB VRAM
     </div>
     """)
     )
 if __name__ == "__main__":
+    print("🚀 로봇 시각 시스템 시작 (Gemma3-4B GGUF Q4_K_M)...")
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,