Spaces:

cella110n
/

cl_tagger

Running on Zero

App Files Files Community

cella110n commited on Apr 28

Commit

d9eba7a

verified ·

1 Parent(s): 7997743

Upload app.py

Browse files

Files changed (1) hide show

app.py +74 -86

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-import spaces
 import onnxruntime as ort
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
@@ -12,6 +12,8 @@ import matplotlib
 from huggingface_hub import hf_hub_download
 from dataclasses import dataclass
 from typing import List, Dict, Optional, Tuple
 # MatplotlibのバックエンドをAggに設定 (GUIなし環境用)
 matplotlib.use('Agg')
@@ -293,7 +295,8 @@ TAG_MAPPING_FILENAME = "cl_eva02_tagger_v1_250426/tag_mapping.json"
 CACHE_DIR = "./model_cache"
 # グローバル変数（モデルとラベルをキャッシュ）
-onnx_session = None
 labels_data = None
 tag_to_category_map = None
@@ -319,111 +322,98 @@ def download_model_files():
 def initialize_model():
-    """モデルとラベルデータを初期化（キャッシュ）"""
-    global onnx_session, labels_data, tag_to_category_map
-    if onnx_session is None:
         model_path, tag_mapping_path = download_model_files()
-        print("Loading model and labels...")
-        # --- Added Logging ---
-        print("--- Environment Check ---")
-        try:
-            import torch
-            print(f"PyTorch version: {torch.__version__}")
-            if torch.cuda.is_available():
-                print(f"PyTorch CUDA available: True")
-                print(f"PyTorch CUDA version: {torch.version.cuda}")
-                print(f"Detected GPU: {torch.cuda.get_device_name(0)}")
-                if torch.backends.cudnn.is_available():
-                    print(f"PyTorch cuDNN available: True")
-                    print(f"PyTorch cuDNN version: {torch.backends.cudnn.version()}")
-                else:
-                    print("PyTorch cuDNN available: False")
-            else:
-                print("PyTorch CUDA available: False")
-        except ImportError:
-            print("PyTorch not found.")
-        except Exception as e:
-            print(f"Error during PyTorch check: {e}")
-        try:
-             print(f"ONNX Runtime build info: {ort.get_buildinfo()}")
-        except Exception as e:
-             print(f"Error getting ONNX Runtime build info: {e}")
-        print("-------------------------")
-        # --- End Added Logging ---
-        # ONNXセッションの初期化 (GPU優先)
-        available_providers = ort.get_available_providers()
-        print(f"Available ONNX Runtime providers: {available_providers}")
-        providers = []
-        if 'CUDAExecutionProvider' in available_providers:
-            providers.append('CUDAExecutionProvider')
-        # elif 'DmlExecutionProvider' in available_providers: # DirectML (Windows)
-        #     providers.append('DmlExecutionProvider')
-        providers.append('CPUExecutionProvider') # Always include CPU as fallback
-        try:
-            onnx_session = ort.InferenceSession(model_path, providers=providers)
-            print(f"Using ONNX Runtime provider: {onnx_session.get_providers()[0]}")
-        except Exception as e:
-            print(f"Error initializing ONNX session with providers {providers}: {e}")
-            print("Falling back to CPUExecutionProvider only.")
-            onnx_session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider'])
         labels_data, _, tag_to_category_map = load_tag_mapping(tag_mapping_path)
-        print("Model and labels loaded.")
 @spaces.GPU()
 def predict(image_input, gen_threshold, char_threshold, output_mode):
-    print("--- predict function started ---") # Add log here
-    """Gradioインターフェース用の予測関数"""
-    initialize_model() # モデルがロードされていなければロード
     if image_input is None:
         return "Please upload an image.", None
-    print(f"Processing image with thresholds: gen={gen_threshold}, char={char_threshold}")
     # PIL Imageオブジェクトであることを確認
     if not isinstance(image_input, Image.Image):
-         try:
-             # URLの場合
-             if isinstance(image_input, str) and image_input.startswith("http"):
-                  response = requests.get(image_input)
-                  response.raise_for_status()
-                  image = Image.open(io.BytesIO(response.content))
-             # ファイルパスの場合 (Gradioでは通常発生しないが念のため)
-             elif isinstance(image_input, str) and os.path.exists(image_input):
-                  image = Image.open(image_input)
-             # Numpy配列の場合 (Gradio Imageコンポーネントからの入力)
-             elif isinstance(image_input, np.ndarray):
-                  image = Image.fromarray(image_input)
-             else:
-                  raise ValueError("Unsupported image input type")
-         except Exception as e:
-             print(f"Error loading image: {e}")
-             return f"Error loading image: {e}", None
     else:
         image = image_input
     # 前処理
     original_pil_image, input_data = preprocess_image(image)
     # データ型をモデルの期待に合わせる (通常はfloat32)
-    input_name = onnx_session.get_inputs()[0].name
-    expected_type = onnx_session.get_inputs()[0].type
     if expected_type == 'tensor(float16)':
         input_data = input_data.astype(np.float16)
     else:
         input_data = input_data.astype(np.float32) # Default to float32
-    # 推論
     start_time = time.time()
-    outputs = onnx_session.run(None, {input_name: input_data})[0]
     inference_time = time.time() - start_time
-    print(f"Inference completed in {inference_time:.3f} seconds")
     # シグモイド関数で確率に変換
     probs = 1 / (1 + np.exp(-outputs[0])) # Apply sigmoid to the first batch item
@@ -437,12 +427,12 @@ def predict(image_input, gen_threshold, char_threshold, output_mode):
     if predictions["rating"]:
         output_tags.append(predictions["rating"][0][0].replace("_", " "))
     if predictions["quality"]:
-         output_tags.append(predictions["quality"][0][0].replace("_", " "))
     # 残りのカテゴリをアルファベット順に追加（オプション）
     for category in ["artist", "character", "copyright", "general", "meta"]:
         tags = [tag.replace("_", " ") for tag, prob in predictions[category]
-                 if not (category == "meta" and any(p in tag.lower() for p in ['id', 'commentary','mismatch']))] # メタタグフィルタリング
         output_tags.extend(tags)
     output_text = ", ".join(output_tags)
@@ -454,7 +444,6 @@ def predict(image_input, gen_threshold, char_threshold, output_mode):
         return output_text, viz_image
 # --- Gradio Interface Definition ---
-import time
 # CSS for styling
 css = """
@@ -594,6 +583,5 @@ if __name__ == "__main__":
     # 環境変数HF_TOKENがない場合に警告（プライベートリポジトリ用）
     if not os.environ.get("HF_TOKEN"):
         print("Warning: HF_TOKEN environment variable not set. Downloads from private repositories may fail.")
-    # Initialize model on startup to avoid delay on first prediction
-    initialize_model() # Removed startup initialization
     demo.launch(share=True)

 import gradio as gr
+# import spaces # Removed
 import onnxruntime as ort
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 from huggingface_hub import hf_hub_download
 from dataclasses import dataclass
 from typing import List, Dict, Optional, Tuple
+import time
+import spaces
 # MatplotlibのバックエンドをAggに設定 (GUIなし環境用)
 matplotlib.use('Agg')
 CACHE_DIR = "./model_cache"
 # グローバル変数（モデルとラベルをキャッシュ）
+# onnx_session = None # Removed global session
+model_path_global = None # Store model path globally
 labels_data = None
 tag_to_category_map = None
 def initialize_model():
+    """モデルファイルとラベルデータを準備（キャッシュ）"""
+    global model_path_global, labels_data, tag_to_category_map
+    # Only initialize once
+    if labels_data is None:
+        print("Downloading model files...") # Moved print here
         model_path, tag_mapping_path = download_model_files()
+        model_path_global = model_path # Store the path
+        print("Loading labels...")
         labels_data, _, tag_to_category_map = load_tag_mapping(tag_mapping_path)
+        print("Labels loaded.")
+    # --- Removed ONNX Session Initialization ---
 @spaces.GPU()
 def predict(image_input, gen_threshold, char_threshold, output_mode):
+    print("--- predict function started (GPU worker) ---")
+    """Gradioインターフェース用の予測関数 (GPUワーカー内)"""
+    initialize_model() # Ensure files/labels are ready
+    # --- Create ONNX session inside the GPU function ---
+    print("Creating ONNX session for prediction...")
+    global model_path_global # Access the global model path
+    if model_path_global is None:
+         # Attempt initialization again if model path is missing (e.g., after restart)
+         initialize_model()
+         if model_path_global is None:
+              return "Error: Model path could not be initialized.", None
+    available_providers = ort.get_available_providers()
+    print(f"(Worker) Available ONNX Runtime providers: {available_providers}")
+    providers = []
+    if 'CUDAExecutionProvider' in available_providers:
+        providers.append('CUDAExecutionProvider')
+    providers.append('CPUExecutionProvider') # Always include CPU as fallback
+    try:
+        # Create session with GPU preference inside the worker
+        session = ort.InferenceSession(model_path_global, providers=providers)
+        print(f"(Worker) Using ONNX Runtime provider: {session.get_providers()[0]}")
+    except Exception as e:
+         print(f"(Worker) Error initializing ONNX session with providers {providers}: {e}")
+         # Fallback explicitly to CPU if GPU fails inside worker
+         try:
+             print("(Worker) Falling back to CPUExecutionProvider only.")
+             session = ort.InferenceSession(model_path_global, providers=['CPUExecutionProvider'])
+         except Exception as e_cpu:
+             print(f"(Worker) Error initializing ONNX session even with CPU: {e_cpu}")
+             return f"Error initializing ONNX session: {e_cpu}", None
+    # --- Session created ---
     if image_input is None:
         return "Please upload an image.", None
+    print(f"(Worker) Processing image with thresholds: gen={gen_threshold}, char={char_threshold}")
     # PIL Imageオブジェクトであることを確認
     if not isinstance(image_input, Image.Image):
+        try:
+            # URLの場合
+            if isinstance(image_input, str) and image_input.startswith("http"):
+                response = requests.get(image_input)
+                response.raise_for_status()
+                image = Image.open(io.BytesIO(response.content))
+            # ファイルパスの場合 (Gradioでは通常発生しないが念のため)
+            elif isinstance(image_input, str) and os.path.exists(image_input):
+                image = Image.open(image_input)
+            # Numpy配列の場合 (Gradio Imageコンポーネントからの入力)
+            elif isinstance(image_input, np.ndarray):
+                image = Image.fromarray(image_input)
+            else:
+                raise ValueError("Unsupported image input type")
+        except Exception as e:
+            print(f"(Worker) Error loading image: {e}")
+            return f"Error loading image: {e}", None
     else:
         image = image_input
     # 前処理
     original_pil_image, input_data = preprocess_image(image)
     # データ型をモデルの期待に合わせる (通常はfloat32)
+    input_name = session.get_inputs()[0].name
+    expected_type = session.get_inputs()[0].type
     if expected_type == 'tensor(float16)':
         input_data = input_data.astype(np.float16)
     else:
         input_data = input_data.astype(np.float32) # Default to float32
+    # 推論 (作成したセッションを使用)
     start_time = time.time()
+    outputs = session.run(None, {input_name: input_data})[0]
     inference_time = time.time() - start_time
+    print(f"(Worker) Inference completed in {inference_time:.3f} seconds")
     # シグモイド関数で確率に変換
     probs = 1 / (1 + np.exp(-outputs[0])) # Apply sigmoid to the first batch item
     if predictions["rating"]:
         output_tags.append(predictions["rating"][0][0].replace("_", " "))
     if predictions["quality"]:
+        output_tags.append(predictions["quality"][0][0].replace("_", " "))
     # 残りのカテゴリをアルファベット順に追加（オプション）
     for category in ["artist", "character", "copyright", "general", "meta"]:
         tags = [tag.replace("_", " ") for tag, prob in predictions[category]
+                if not (category == "meta" and any(p in tag.lower() for p in ['id', 'commentary','mismatch']))] # メタタグフィルタリング
         output_tags.extend(tags)
     output_text = ", ".join(output_tags)
         return output_text, viz_image
 # --- Gradio Interface Definition ---
 # CSS for styling
 css = """
     # 環境変数HF_TOKENがない場合に警告（プライベートリポジトリ用）
     if not os.environ.get("HF_TOKEN"):
         print("Warning: HF_TOKEN environment variable not set. Downloads from private repositories may fail.")
+    # initialize_model() # Removed startup initialization (model loaded in predict)
     demo.launch(share=True)