Spaces:

marcuscanhaco
/

weapon-detection-app

Runtime error

App Files Files Community

Marcus Vinicius Zerbini Canhaço commited on Feb 12

Commit

b181644

1 Parent(s): 739fe61

feat: atualização do detector com otimizações para GPU T4

Browse files

Files changed (3) hide show

.env.huggingface +7 -16
src/domain/detectors/gpu.py +55 -363
src/domain/factories/detector_factory.py +1 -1

.env.huggingface CHANGED Viewed

@@ -1,23 +1,13 @@
 # Configurações do Modelo
-HUGGING_FACE_TOKEN=""  # Configure no Hugging Face Space
-TOKENIZERS_PARALLELISM=false
 MODEL_CACHE_DIR=./.model_cache
 BATCH_SIZE=16
-MAX_WORKERS=2
 USE_HALF_PRECISION=true
 DETECTION_CONFIDENCE_THRESHOLD=0.5
-MODEL_CONFIDENCE_THRESHOLD=0.5
-MODEL_IOU_THRESHOLD=0.45
 # Configurações de Cache
 CACHE_DIR=/code/.cache/weapon_detection_cache
 RESULT_CACHE_SIZE=1000
-# Configurações de E-mail
-NOTIFICATION_EMAIL=""  # Configure no Hugging Face Space
-SENDGRID_API_KEY=xxx
-[email protected]
 # Configurações do Servidor
 SERVER_HOST=0.0.0.0
 SERVER_PORT=7860
@@ -28,17 +18,18 @@ DEFAULT_FPS=2
 DEFAULT_RESOLUTION=640
 # Configurações de GPU
 CUDA_VISIBLE_DEVICES=0
-TORCH_CUDA_ARCH_LIST="7.5"
 NVIDIA_VISIBLE_DEVICES=all
-NVIDIA_DRIVER_CAPABILITIES=compute,utility
-# Configurações do Hugging Face
-HF_TOKEN=hf_xxx  # Substitua com seu token real
-# Configurações do Telegram
 TELEGRAM_BOT_TOKEN=xxx
 TELEGRAM_CHAT_ID=xxx
-# Configurações do Discord
 DISCORD_WEBHOOK_URL=xxx

 # Configurações do Modelo
 MODEL_CACHE_DIR=./.model_cache
 BATCH_SIZE=16
 USE_HALF_PRECISION=true
 DETECTION_CONFIDENCE_THRESHOLD=0.5
 # Configurações de Cache
 CACHE_DIR=/code/.cache/weapon_detection_cache
 RESULT_CACHE_SIZE=1000
 # Configurações do Servidor
 SERVER_HOST=0.0.0.0
 SERVER_PORT=7860
 DEFAULT_RESOLUTION=640
 # Configurações de GPU
+CUDA_DEVICE=0
 CUDA_VISIBLE_DEVICES=0
 NVIDIA_VISIBLE_DEVICES=all
+# Configurações de E-mail
+NOTIFICATION_EMAIL=""  # Configure no Hugging Face Space
+SENDGRID_API_KEY=xxx
+[email protected]
+# Configurações de Telegram
 TELEGRAM_BOT_TOKEN=xxx
 TELEGRAM_CHAT_ID=xxx
+# Configurações de Discord
 DISCORD_WEBHOOK_URL=xxx

src/domain/detectors/gpu.py CHANGED Viewed

@@ -1,134 +1,69 @@
 import torch
 import torch.nn.functional as F
-import torch._dynamo
 import logging
 import os
-import time
 import gc
 import numpy as np
 import cv2
 from PIL import Image
 from transformers import Owlv2Processor, Owlv2ForObjectDetection
-from .base import BaseDetector, BaseCache
-import tempfile
 logger = logging.getLogger(__name__)
-# Configurações globais do PyTorch para otimização em GPU
-torch.backends.cuda.matmul.allow_tf32 = True
-torch.backends.cudnn.allow_tf32 = True
-torch.backends.cudnn.benchmark = True
-torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True
-torch._dynamo.config.suppress_errors = True
-# Configurações para Zero-GPU
-os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
-class GPUCache(BaseCache):
-    """Cache otimizado para GPU."""
-    def __init__(self, max_size: int = 100):  # Reduzido para economizar memória
-        super().__init__(max_size)
-        self.device = torch.device('cuda')
 class WeaponDetectorGPU(BaseDetector):
-    """Implementação GPU do detector de armas com otimizações para a última versão do OWLv2."""
     def __init__(self):
-        """Inicializa variáveis básicas."""
         super().__init__()
-        self.default_resolution = 512  # Reduzido para economizar memória
-        self.amp_dtype = torch.float16
-        self.preprocess_stream = torch.cuda.Stream()
-        self.max_batch_size = 4  # Reduzido para Zero-GPU
-        self.current_batch_size = 2  # Reduzido para Zero-GPU
-        self.min_batch_size = 1
     def _initialize(self):
-        """Inicializa o modelo e o processador para execução exclusiva em GPU."""
         try:
             # Configurar device
-            self.device = self._get_best_device()
-            # Diretório de cache para o modelo
-            cache_dir = os.path.join(tempfile.gettempdir(), 'weapon_detection_cache')
-            os.makedirs(cache_dir, exist_ok=True)
-            # Limpar memória GPU
-            self._clear_gpu_memory()
             logger.info("Carregando modelo e processador...")
-            # Carregar processador e modelo com otimizações
             model_name = "google/owlv2-base-patch16"
-            self.owlv2_processor = Owlv2Processor.from_pretrained(
-                model_name,
-                cache_dir=cache_dir
-            )
-            # Configurações otimizadas para Zero-GPU
             self.owlv2_model = Owlv2ForObjectDetection.from_pretrained(
                 model_name,
-                cache_dir=cache_dir,
-                torch_dtype=self.amp_dtype,
-                device_map="auto",
-                low_cpu_mem_usage=True,
-                max_memory={'cuda:0': '10GB'}  # Limitar uso de memória
             ).to(self.device)
-            # Otimizar modelo para inferência
             self.owlv2_model.eval()
-            # Usar queries do método base
             self.text_queries = self._get_detection_queries()
-            logger.info(f"Total de queries carregadas: {len(self.text_queries)}")
-            # Processar queries uma única vez com otimização de memória
-            with torch.cuda.amp.autocast(dtype=self.amp_dtype):
-                self.processed_text = self.owlv2_processor(
-                    text=self.text_queries,
-                    return_tensors="pt",
-                    padding=True
-                )
-                self.processed_text = {
-                    key: val.to(self.device, non_blocking=True)
-                    for key, val in self.processed_text.items()
-                }
-            # Ajustar batch size baseado na memória disponível
-            self._adjust_batch_size()
-            logger.info(f"Inicialização GPU completa! Batch size inicial: {self.current_batch_size}")
             self._initialized = True
         except Exception as e:
             logger.error(f"Erro na inicialização GPU: {str(e)}")
             raise
-    def _adjust_batch_size(self):
-        """Ajusta o batch size baseado na memória disponível."""
-        try:
-            gpu_mem = torch.cuda.get_device_properties(0).total_memory
-            free_mem = torch.cuda.memory_reserved() - torch.cuda.memory_allocated()
-            mem_ratio = free_mem / gpu_mem
-            if mem_ratio < 0.2:  # Menos de 20% livre
-                self.current_batch_size = max(self.min_batch_size, self.current_batch_size // 2)
-            elif mem_ratio > 0.4:  # Mais de 40% livre
-                self.current_batch_size = min(self.max_batch_size, self.current_batch_size * 2)
-            logger.debug(f"Batch size ajustado para {self.current_batch_size} (Memória livre: {mem_ratio:.1%})")
-        except Exception as e:
-            logger.warning(f"Erro ao ajustar batch size: {str(e)}")
-            self.current_batch_size = self.min_batch_size
     def detect_objects(self, image: Image.Image, threshold: float = 0.3) -> list:
-        """Detecta objetos em uma imagem utilizando a última versão do OWLv2."""
         try:
-            self.threshold = threshold
             # Pré-processar imagem
             if image.mode != 'RGB':
                 image = image.convert('RGB')
@@ -138,7 +73,6 @@ class WeaponDetectorGPU(BaseDetector):
                 images=image,
                 return_tensors="pt"
             )
             image_inputs = {
                 key: val.to(self.device)
                 for key, val in image_inputs.items()
@@ -177,283 +111,41 @@ class WeaponDetectorGPU(BaseDetector):
             logger.error(f"Erro em detect_objects: {str(e)}")
             return []
     def process_video(self, video_path: str, fps: int = None, threshold: float = 0.3, resolution: int = 640) -> tuple:
-        """Processa um vídeo utilizando GPU com processamento em lote e otimizações para T4."""
         try:
-            metrics = {
-                "total_time": 0,
-                "frame_extraction_time": 0,
-                "analysis_time": 0,
-                "frames_analyzed": 0,
-                "video_duration": 0,
-                "device_type": self.device.type,
-                "detections": [],
-                "technical": {
-                    "model": "owlv2-base-patch16",
-                    "input_size": f"{resolution}x{resolution}",
-                    "threshold": threshold,
-                    "batch_size": self.current_batch_size,
-                    "gpu_memory": f"{torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f}GB"
-                }
-            }
-            start_time = time.time()
-            frames = self.extract_frames(video_path, fps, resolution)
-            metrics["frame_extraction_time"] = time.time() - start_time
             metrics["frames_analyzed"] = len(frames)
-            if not frames:
-                logger.warning("Nenhum frame extraído do vídeo")
-                return video_path, metrics
-            metrics["video_duration"] = len(frames) / (fps or 2)
-            analysis_start = time.time()
-            # Processar frames em lotes com ajuste dinâmico de batch size
-            for i in range(0, len(frames), self.current_batch_size):
-                try:
-                    batch_frames = frames[i:i + self.current_batch_size]
-                    # Pré-processamento assíncrono
-                    with torch.cuda.stream(self.preprocess_stream):
-                        batch_images = [
-                            Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-                            for frame in batch_frames
-                        ]
-                        batch_inputs = self.owlv2_processor(
-                            images=batch_images,
-                            return_tensors="pt"
-                        )
-                        batch_inputs = {
-                            key: val.to(self.device, non_blocking=True)
-                            for key, val in batch_inputs.items()
-                        }
-                    # Expandir texto processado para o batch
-                    batch_text = {
-                        key: val.repeat(len(batch_images), 1)
-                        for key, val in self.processed_text.items()
-                    }
-                    inputs = {**batch_inputs, **batch_text}
-                    # Inferência com mixed precision
-                    with torch.cuda.amp.autocast(dtype=self.amp_dtype):
-                        with torch.no_grad():
-                            outputs = self.owlv2_model(**inputs)
-                    # Processar resultados
-                    target_sizes = torch.tensor([[img.size[::-1] for img in batch_images]], device=self.device)
-                    results = self.owlv2_processor.post_process_grounded_object_detection(
-                        outputs=outputs,
-                        target_sizes=target_sizes[0],
-                        threshold=threshold
-                    )
-                    # Verificar detecções
-                    for batch_idx, result in enumerate(results):
-                        if len(result["scores"]) > 0:
-                            frame_idx = i + batch_idx
-                            max_score_idx = torch.argmax(result["scores"])
-                            score = result["scores"][max_score_idx]
-                            if score.item() >= threshold:
-                                detection = {
-                                    "frame": frame_idx,
-                                    "confidence": score.item(),
-                                    "box": [int(x) for x in result["boxes"][max_score_idx].tolist()],
-                                    "label": self.text_queries[result["labels"][max_score_idx]]
-                                }
-                                metrics["detections"].append(detection)
-                                metrics["analysis_time"] = time.time() - analysis_start
-                                metrics["total_time"] = time.time() - start_time
-                                return video_path, metrics
-                    # Limpar memória e ajustar batch size periodicamente
-                    if (i // self.current_batch_size) % 5 == 0:
-                        self._clear_gpu_memory()
-                        self._adjust_batch_size()
-                except RuntimeError as e:
-                    if "out of memory" in str(e):
-                        logger.warning("OOM detectado, reduzindo batch size")
-                        self._clear_gpu_memory()
-                        self.current_batch_size = max(self.min_batch_size, self.current_batch_size // 2)
-                        continue
-                    raise
-            metrics["analysis_time"] = time.time() - analysis_start
-            metrics["total_time"] = time.time() - start_time
             return video_path, metrics
         except Exception as e:
             logger.error(f"Erro ao processar vídeo: {str(e)}")
-            return video_path, metrics
-    def _clear_gpu_memory(self):
-        """Limpa memória GPU de forma agressiva."""
-        try:
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
-            gc.collect()
-        except Exception as e:
-            logger.error(f"Erro ao limpar memória GPU: {str(e)}")
-    def _get_best_device(self):
-        if not torch.cuda.is_available():
-            raise RuntimeError("CUDA não está disponível!")
-        return torch.device('cuda')
-    def _preprocess_image(self, image: Image.Image) -> Image.Image:
-        """Pré-processa a imagem com otimizações para GPU."""
-        try:
-            target_size = (self.default_resolution, self.default_resolution)
-            if image.mode != 'RGB':
-                image = image.convert('RGB')
-            if image.size != target_size:
-                ratio = min(target_size[0] / image.size[0], target_size[1] / image.size[1])
-                new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
-                with torch.cuda.stream(self.preprocess_stream), torch.amp.autocast(device_type='cuda', dtype=self.amp_dtype):
-                    img_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).unsqueeze(0)
-                    img_tensor = img_tensor.to(self.device, dtype=self.amp_dtype, non_blocking=True)
-                    img_tensor = img_tensor / 255.0
-                    mode = 'bilinear' if ratio < 1 else 'nearest'
-                    img_tensor = F.interpolate(
-                        img_tensor,
-                        size=new_size,
-                        mode=mode,
-                        align_corners=False if mode == 'bilinear' else None
-                    )
-                    if new_size != target_size:
-                        final_tensor = torch.zeros(
-                            (1, 3, target_size[1], target_size[0]),
-                            device=self.device,
-                            dtype=self.amp_dtype
-                        )
-                        pad_left = (target_size[0] - new_size[0]) // 2
-                        pad_top = (target_size[1] - new_size[1]) // 2
-                        final_tensor[
-                            :,
-                            :,
-                            pad_top:pad_top + new_size[1],
-                            pad_left:pad_left + new_size[0]
-                        ] = img_tensor
-                        img_tensor = final_tensor
-                    img_tensor = img_tensor.squeeze(0).permute(1, 2, 0).cpu()
-                    image = Image.fromarray((img_tensor.numpy() * 255).astype(np.uint8))
-            return image
-        except Exception as e:
-            logger.error(f"Erro no pré-processamento: {str(e)}")
-            return image
-    def _get_memory_usage(self):
-        """Retorna o uso atual de memória GPU em porcentagem."""
-        try:
-            allocated = torch.cuda.memory_allocated()
-            reserved = torch.cuda.memory_reserved()
-            total = torch.cuda.get_device_properties(0).total_memory
-            return (allocated + reserved) / total * 100
-        except Exception as e:
-            logger.error(f"Erro ao obter uso de memória GPU: {str(e)}")
-            return 0
-    def _apply_nms(self, detections: list, iou_threshold: float = 0.5) -> list:
-        """Aplica Non-Maximum Suppression nas detecções usando operações em GPU."""
-        try:
-            if not detections:
-                return []
-            # Converter detecções para tensores na GPU
-            boxes = torch.tensor([[d["box"][0], d["box"][1], d["box"][2], d["box"][3]] for d in detections], device=self.device)
-            scores = torch.tensor([d["confidence"] for d in detections], device=self.device)
-            labels = [d["label"] for d in detections]
-            # Calcular áreas dos boxes
-            area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
-            # Ordenar por score
-            _, order = scores.sort(descending=True)
-            keep = []
-            while order.numel() > 0:
-                if order.numel() == 1:
-                    keep.append(order.item())
-                    break
-                i = order[0]
-                keep.append(i.item())
-                # Calcular IoU com os boxes restantes
-                xx1 = torch.max(boxes[i, 0], boxes[order[1:], 0])
-                yy1 = torch.max(boxes[i, 1], boxes[order[1:], 1])
-                xx2 = torch.min(boxes[i, 2], boxes[order[1:], 2])
-                yy2 = torch.min(boxes[i, 3], boxes[order[1:], 3])
-                w = torch.clamp(xx2 - xx1, min=0)
-                h = torch.clamp(yy2 - yy1, min=0)
-                inter = w * h
-                # Calcular IoU
-                ovr = inter / (area[i] + area[order[1:]] - inter)
-                # Encontrar boxes com IoU menor que o threshold
-                ids = (ovr <= iou_threshold).nonzero().squeeze()
-                if ids.numel() == 0:
-                    break
-                order = order[ids + 1]
-            # Construir lista de detecções filtradas
-            filtered_detections = []
-            for idx in keep:
-                filtered_detections.append({
-                    "confidence": scores[idx].item(),
-                    "box": boxes[idx].tolist(),
-                    "label": labels[idx]
-                })
-            return filtered_detections
-        except Exception as e:
-            logger.error(f"Erro ao aplicar NMS na GPU: {str(e)}")
-            return []
-    def _should_clear_cache(self):
-        """Determina se o cache deve ser limpo baseado no uso de memória."""
-        try:
-            memory_usage = self._get_memory_usage()
-            if memory_usage > 90:
-                return True
-            if memory_usage > 75 and not hasattr(self, '_last_cache_clear'):
-                return True
-            if hasattr(self, '_last_cache_clear'):
-                time_since_last_clear = time.time() - self._last_cache_clear
-                if memory_usage > 80 and time_since_last_clear > 300:
-                    return True
-            return False
-        except Exception as e:
-            logger.error(f"Erro ao verificar necessidade de limpeza: {str(e)}")
-            return False
-    def clear_cache(self):
-        """Limpa o cache de resultados e libera memória quando necessário."""
-        try:
-            if self._should_clear_cache():
-                if hasattr(self, 'result_cache'):
-                    self.result_cache.clear()
-                torch.cuda.empty_cache()
-                gc.collect()
-                self._last_cache_clear = time.time()
-                logger.info(f"Cache GPU limpo com sucesso. Uso de memória: {self._get_memory_usage():.1f}%")
-            else:
-                logger.debug("Limpeza de cache não necessária no momento")
-        except Exception as e:
-            logger.error(f"Erro ao limpar cache GPU: {str(e)}")

 import torch
 import torch.nn.functional as F
 import logging
 import os
 import gc
 import numpy as np
 import cv2
 from PIL import Image
 from transformers import Owlv2Processor, Owlv2ForObjectDetection
+from .base import BaseDetector
 logger = logging.getLogger(__name__)
 class WeaponDetectorGPU(BaseDetector):
+    """Detector de armas otimizado para GPU."""
     def __init__(self):
+        """Inicializa o detector."""
         super().__init__()
+        self.default_resolution = 640
+        self.device = self._get_best_device()
+        self._initialize()
     def _initialize(self):
+        """Inicializa o modelo."""
         try:
             # Configurar device
+            if not torch.cuda.is_available():
+                raise RuntimeError("CUDA não está disponível!")
+            # Carregar modelo e processador
             logger.info("Carregando modelo e processador...")
             model_name = "google/owlv2-base-patch16"
+            self.owlv2_processor = Owlv2Processor.from_pretrained(model_name)
             self.owlv2_model = Owlv2ForObjectDetection.from_pretrained(
                 model_name,
+                torch_dtype=torch.float16,
+                device_map="auto"
             ).to(self.device)
+            # Otimizar modelo
             self.owlv2_model.eval()
+            # Processar queries
             self.text_queries = self._get_detection_queries()
+            self.processed_text = self.owlv2_processor(
+                text=self.text_queries,
+                return_tensors="pt",
+                padding=True
+            )
+            self.processed_text = {
+                key: val.to(self.device)
+                for key, val in self.processed_text.items()
+            }
+            logger.info("Inicialização GPU completa!")
             self._initialized = True
         except Exception as e:
             logger.error(f"Erro na inicialização GPU: {str(e)}")
             raise
     def detect_objects(self, image: Image.Image, threshold: float = 0.3) -> list:
+        """Detecta objetos em uma imagem."""
         try:
             # Pré-processar imagem
             if image.mode != 'RGB':
                 image = image.convert('RGB')
                 images=image,
                 return_tensors="pt"
             )
             image_inputs = {
                 key: val.to(self.device)
                 for key, val in image_inputs.items()
             logger.error(f"Erro em detect_objects: {str(e)}")
             return []
+    def _get_best_device(self):
+        """Retorna o melhor dispositivo disponível."""
+        return torch.device(0)  # Usar primeira GPU
+    def _clear_gpu_memory(self):
+        """Limpa memória GPU."""
+        torch.cuda.empty_cache()
+        gc.collect()
     def process_video(self, video_path: str, fps: int = None, threshold: float = 0.3, resolution: int = 640) -> tuple:
+        """Processa um vídeo."""
+        metrics = {
+            "total_time": 0,
+            "frames_analyzed": 0,
+            "detections": []
+        }
         try:
+            frames = self.extract_frames(video_path, fps or 2, resolution)
             metrics["frames_analyzed"] = len(frames)
+            for i, frame in enumerate(frames):
+                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                frame_pil = Image.fromarray(frame_rgb)
+                detections = self.detect_objects(frame_pil, threshold)
+                if detections:
+                    metrics["detections"].append({
+                        "frame": i,
+                        "detections": detections
+                    })
+                    return video_path, metrics
             return video_path, metrics
         except Exception as e:
             logger.error(f"Erro ao processar vídeo: {str(e)}")
+            return video_path, metrics

src/domain/factories/detector_factory.py CHANGED Viewed

@@ -72,7 +72,7 @@ def is_gpu_available():
                 return False
             # Verificar se podemos realmente usar a GPU
-            device = torch.device('cuda')
             dummy_tensor = torch.zeros(1, device=device)
             del dummy_tensor
             torch.cuda.empty_cache()

                 return False
             # Verificar se podemos realmente usar a GPU
+            device = torch.device(0)  # Usar índice do dispositivo
             dummy_tensor = torch.zeros(1, device=device)
             del dummy_tensor
             torch.cuda.empty_cache()