Hashii1729 commited on
Commit
c96ca80
·
1 Parent(s): 384c4a0

Enhance HuggingFaceFashionAnalyzer: optimize model loading, suppress warnings, and improve CPU performance settings

Browse files
Files changed (1) hide show
  1. fast.py +71 -11
fast.py CHANGED
@@ -15,14 +15,28 @@ import torch.nn.functional as F
15
  import torchvision.transforms as v2
16
  from huggingface_hub import PyTorchModelHubMixin
17
  import numpy as np
 
 
 
 
 
 
 
18
 
19
  app = FastAPI(title="HuggingFace Fashion Analyzer API", version="1.0.0")
20
 
21
  # Fashion Image Encoder class for yainage90 model
22
  class ImageEncoder(nn.Module, PyTorchModelHubMixin):
23
- def __init__(self, config):
24
  super(ImageEncoder, self).__init__()
25
- self.swin = SwinModel(config=config)
 
 
 
 
 
 
 
26
  self.embedding_layer = nn.Linear(config.hidden_size, 128)
27
 
28
  def forward(self, image_tensor):
@@ -39,11 +53,27 @@ class HuggingFaceFashionAnalyzer:
39
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
40
  print(f"Using device: {self.device}")
41
 
 
 
 
 
 
42
  # Initialize yainage90 fashion object detection model
43
  try:
44
  self.detection_ckpt = 'yainage90/fashion-object-detection'
45
- self.detection_processor = AutoImageProcessor.from_pretrained(self.detection_ckpt)
46
- self.detection_model = AutoModelForObjectDetection.from_pretrained(self.detection_ckpt).to(self.device)
 
 
 
 
 
 
 
 
 
 
 
47
  print("Fashion object detection model loaded successfully")
48
  except Exception as e:
49
  print(f"Error loading fashion detection model: {e}")
@@ -54,10 +84,21 @@ class HuggingFaceFashionAnalyzer:
54
  try:
55
  self.encoder_ckpt = "yainage90/fashion-image-feature-extractor"
56
  self.encoder_config = SwinConfig.from_pretrained(self.encoder_ckpt)
57
- self.encoder_image_processor = AutoImageProcessor.from_pretrained(self.encoder_ckpt)
 
 
 
 
58
 
59
  # Create the encoder with proper configuration - use from_pretrained directly
60
- self.feature_encoder = ImageEncoder.from_pretrained(self.encoder_ckpt).to(self.device)
 
 
 
 
 
 
 
61
 
62
  # Setup image transforms for feature extraction
63
  self.transform = v2.Compose([
@@ -71,12 +112,21 @@ class HuggingFaceFashionAnalyzer:
71
  self.feature_encoder = None
72
  self.transform = None
73
 
74
- # Initialize basic image captioning as fallback
75
  try:
 
 
 
 
 
 
 
 
76
  self.image_to_text = pipeline(
77
  "image-to-text",
78
  model="Salesforce/blip-image-captioning-base",
79
- device=0 if torch.cuda.is_available() else -1
 
80
  )
81
  print("Basic image captioning model loaded successfully")
82
  except Exception as e:
@@ -88,6 +138,12 @@ class HuggingFaceFashionAnalyzer:
88
  0: 'bag', 1: 'bottom', 2: 'dress', 3: 'hat', 4: 'shoes', 5: 'outer', 6: 'top'
89
  }
90
 
 
 
 
 
 
 
91
  def process_image_from_bytes(self, image_bytes):
92
  """Process image bytes and return PIL Image"""
93
  image = Image.open(io.BytesIO(image_bytes))
@@ -412,9 +468,12 @@ class HuggingFaceFashionAnalyzer:
412
  return {"error": "Fashion detection model not available"}
413
 
414
  try:
415
- with torch.no_grad():
 
416
  inputs = self.detection_processor(images=[image], return_tensors="pt")
417
- outputs = self.detection_model(**inputs.to(self.device))
 
 
418
  target_sizes = torch.tensor([[image.size[1], image.size[0]]])
419
  results = self.detection_processor.post_process_object_detection(
420
  outputs, threshold=0.4, target_sizes=target_sizes
@@ -445,7 +504,8 @@ class HuggingFaceFashionAnalyzer:
445
  # Transform image for feature extraction
446
  image_tensor = self.transform(image)
447
 
448
- with torch.no_grad():
 
449
  embedding = self.feature_encoder(image_tensor.unsqueeze(0).to(self.device))
450
 
451
  return {
 
15
  import torchvision.transforms as v2
16
  from huggingface_hub import PyTorchModelHubMixin
17
  import numpy as np
18
+ import warnings
19
+
20
+ # Suppress specific warnings for cleaner output
21
+ warnings.filterwarnings("ignore", message=".*use_fast.*")
22
+ warnings.filterwarnings("ignore", message=".*copying from a non-meta parameter.*")
23
+ warnings.filterwarnings("ignore", message=".*slow image processor.*")
24
+ warnings.filterwarnings("ignore", message=".*slow processor.*")
25
 
26
  app = FastAPI(title="HuggingFace Fashion Analyzer API", version="1.0.0")
27
 
28
  # Fashion Image Encoder class for yainage90 model
29
  class ImageEncoder(nn.Module, PyTorchModelHubMixin):
30
+ def __init__(self, config=None):
31
  super(ImageEncoder, self).__init__()
32
+ if config is None:
33
+ # Create a default config if none provided
34
+ config = SwinConfig()
35
+ elif isinstance(config, dict):
36
+ # Convert dict to SwinConfig if needed
37
+ config = SwinConfig(**config)
38
+
39
+ self.swin = SwinModel(config)
40
  self.embedding_layer = nn.Linear(config.hidden_size, 128)
41
 
42
  def forward(self, image_tensor):
 
53
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
54
  print(f"Using device: {self.device}")
55
 
56
+ # Set CPU optimization settings
57
+ if self.device == "cpu":
58
+ torch.set_num_threads(2) # Limit CPU threads to reduce load
59
+ print("CPU optimization: Limited threads to 2 for better performance")
60
+
61
  # Initialize yainage90 fashion object detection model
62
  try:
63
  self.detection_ckpt = 'yainage90/fashion-object-detection'
64
+ # Use fast processor to avoid warnings
65
+ self.detection_processor = AutoImageProcessor.from_pretrained(
66
+ self.detection_ckpt,
67
+ use_fast=True
68
+ )
69
+ # Load model with proper parameter assignment to avoid warnings
70
+ with warnings.catch_warnings():
71
+ warnings.simplefilter("ignore")
72
+ self.detection_model = AutoModelForObjectDetection.from_pretrained(
73
+ self.detection_ckpt,
74
+ torch_dtype=torch.float32 if self.device == "cpu" else torch.float16,
75
+ low_cpu_mem_usage=True if self.device == "cpu" else False
76
+ ).to(self.device)
77
  print("Fashion object detection model loaded successfully")
78
  except Exception as e:
79
  print(f"Error loading fashion detection model: {e}")
 
84
  try:
85
  self.encoder_ckpt = "yainage90/fashion-image-feature-extractor"
86
  self.encoder_config = SwinConfig.from_pretrained(self.encoder_ckpt)
87
+ # Use fast processor to avoid warnings
88
+ self.encoder_image_processor = AutoImageProcessor.from_pretrained(
89
+ self.encoder_ckpt,
90
+ use_fast=True
91
+ )
92
 
93
  # Create the encoder with proper configuration - use from_pretrained directly
94
+ with warnings.catch_warnings():
95
+ warnings.simplefilter("ignore")
96
+ self.feature_encoder = ImageEncoder.from_pretrained(self.encoder_ckpt).to(self.device)
97
+ # Set appropriate dtype after loading
98
+ if self.device == "cpu":
99
+ self.feature_encoder = self.feature_encoder.float()
100
+ else:
101
+ self.feature_encoder = self.feature_encoder.half()
102
 
103
  # Setup image transforms for feature extraction
104
  self.transform = v2.Compose([
 
112
  self.feature_encoder = None
113
  self.transform = None
114
 
115
+ # Initialize basic image captioning as fallback with CPU optimization
116
  try:
117
+ # Configure model kwargs for CPU optimization
118
+ model_kwargs = {}
119
+ if self.device == "cpu":
120
+ model_kwargs["low_cpu_mem_usage"] = True
121
+ model_kwargs["torch_dtype"] = torch.float32
122
+ else:
123
+ model_kwargs["torch_dtype"] = torch.float16
124
+
125
  self.image_to_text = pipeline(
126
  "image-to-text",
127
  model="Salesforce/blip-image-captioning-base",
128
+ device=0 if torch.cuda.is_available() else -1,
129
+ model_kwargs=model_kwargs
130
  )
131
  print("Basic image captioning model loaded successfully")
132
  except Exception as e:
 
138
  0: 'bag', 1: 'bottom', 2: 'dress', 3: 'hat', 4: 'shoes', 5: 'outer', 6: 'top'
139
  }
140
 
141
+ # Set models to evaluation mode for inference optimization
142
+ if self.detection_model:
143
+ self.detection_model.eval()
144
+ if self.feature_encoder:
145
+ self.feature_encoder.eval()
146
+
147
  def process_image_from_bytes(self, image_bytes):
148
  """Process image bytes and return PIL Image"""
149
  image = Image.open(io.BytesIO(image_bytes))
 
468
  return {"error": "Fashion detection model not available"}
469
 
470
  try:
471
+ # Use inference mode for better performance
472
+ with torch.inference_mode():
473
  inputs = self.detection_processor(images=[image], return_tensors="pt")
474
+ # Move inputs to device efficiently
475
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
476
+ outputs = self.detection_model(**inputs)
477
  target_sizes = torch.tensor([[image.size[1], image.size[0]]])
478
  results = self.detection_processor.post_process_object_detection(
479
  outputs, threshold=0.4, target_sizes=target_sizes
 
504
  # Transform image for feature extraction
505
  image_tensor = self.transform(image)
506
 
507
+ # Use inference mode for better performance
508
+ with torch.inference_mode():
509
  embedding = self.feature_encoder(image_tensor.unsqueeze(0).to(self.device))
510
 
511
  return {