Spaces:

todap
/

Image-Processing-Pipeline

Sleeping

todap commited on Aug 20, 2024

Commit

9cfa91c

verified ·

1 Parent(s): 3a909aa

Upload 5 files

Files changed (5) hide show

models/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

models/identification_model.py ADDED Viewed

+from transformers import CLIPProcessor, CLIPModel
+from PIL import Image
+import torch
+class IdentificationModel:
+    def __init__(self):
+        self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+        self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model.to(self.device)
+    def identify_objects(self, image_path, text_descriptions):
+        # Load image
+        image = Image.open(image_path)
+        # Prepare inputs
+        inputs = self.processor(text=text_descriptions, images=image, return_tensors="pt", padding=True)
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        # Run inference
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        # Get logits and compute probabilities
+        logits_per_image = outputs.logits_per_image # this is the image-text similarity score
+        probs = logits_per_image.softmax(dim=1) # convert logits to probabilities
+        # Find the detection with the maximum probability
+        max_prob, max_idx = torch.max(probs[0], dim=0)
+        # Prepare the result for the highest probability detection
+        detection=[]
+        detection.append({
+            'description': text_descriptions[max_idx],
+            'probability': float(max_prob)
+        })
+        return detection

models/segmentation_model.py ADDED Viewed

+from ultralytics import YOLO
+import numpy as np
+import torchvision.transforms as transforms
+class SegmentationModel:
+    def __init__(self):
+        self.model = YOLO('yolov8m-seg.pt')
+        self.transform = transforms.Compose([
+            transforms.Resize((640, 640)),  # Resize to YOLOv8 input size
+            transforms.Lambda(lambda x: x.mul(255).byte()),  # Scale to 0-255 and convert to uint8
+            transforms.Lambda(lambda x: x.permute(1, 2, 0).numpy())  # Change from BCHW to HWC
+        ])
+    def segment_image(self, image_path):
+        results = self.model(image_path, conf=0.25)
+        class_name=[]
+        if results[0].masks is not None:
+            for counter, detection in enumerate(results[0].masks.data):
+                cls_id = int(results[0].boxes[counter].cls.item())
+                class_name.append(self.model.names[cls_id])
+        print(class_name)
+        # Extract masks, boxes, and labels
+        result = results[0]
+        masks = result.masks.data.cpu().numpy() if result.masks is not None else np.array([])
+        boxes = result.boxes.xyxy.cpu().numpy() if result.boxes is not None else np.array([])
+        labels = result.boxes.cls.cpu().numpy() if result.boxes is not None else np.array([])
+        return masks, boxes, labels, class_name

models/summarization_model.py ADDED Viewed

+from transformers import BartForConditionalGeneration, BartTokenizer
+class SummarizationModel:
+    def __init__(self):
+        self.model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
+        self.tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
+    def summarize(self, text):
+    # Split the text into lines and remove empty lines
+        lines = [line.strip() for line in text.split('\n') if line.strip()]
+        # If there's only one line, return it as is
+        if len(lines) <= 1:
+            return text.strip()
+        # Otherwise, proceed with summarization
+        inputs = self.tokenizer([text], max_length=1024, return_tensors="pt", truncation=True)
+        summary_ids = self.model.generate(inputs["input_ids"], num_beams=4, max_length=100, early_stopping=True)
+        return self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)

models/text_extraction_model.py ADDED Viewed

+import easyocr
+class TextExtractionModel:
+    def __init__(self):
+        self.reader = easyocr.Reader(['en'])
+    def extract_text(self, image_path):
+        result = self.reader.readtext(image_path)
+        return ' '.join([detection[1] for detection in result])