Spaces:

Abdu07
/

DualSight-Demo

Running

App Files Files Community

Abdu07 commited on Mar 26

Commit

526455e

verified ·

1 Parent(s): 4fc801a

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -32

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import torchvision.models as models
 import torchvision.transforms as transforms
 from PIL import Image
 from huggingface_hub import hf_hub_download
 ########################################
 # 1. Define the Model Architecture
@@ -13,9 +14,7 @@ class MultiTaskModel(nn.Module):
     def __init__(self, backbone, feature_dim, num_obj_classes):
         super(MultiTaskModel, self).__init__()
         self.backbone = backbone
-        # Object recognition head
         self.obj_head = nn.Linear(feature_dim, num_obj_classes)
-        # Binary classification head (0: AI-generated, 1: Real)
         self.bin_head = nn.Linear(feature_dim, 2)
     def forward(self, x):
@@ -27,41 +26,33 @@ class MultiTaskModel(nn.Module):
 ########################################
 # 2. Reconstruct the Model and Load Weights
 ########################################
-# IMPORTANT: Use the same number of object classes as in training.
-num_obj_classes = 494  # Updated to match the state dict from training
 device = torch.device("cpu")
-# Instantiate the backbone: a ResNet-50 with its final layer removed.
 resnet = models.resnet50(pretrained=False)
-resnet.fc = nn.Identity()  # Remove final classification layer
 feature_dim = 2048
-# Build the model architecture.
 model = MultiTaskModel(resnet, feature_dim, num_obj_classes)
 model.to(device)
-# Download the state dict from HF Hub.
-repo_id = "Abdu07/multitask-model"  # Your repo name
-filename = "Yolloplusclassproject_weights.pth"  # The state dict file you uploaded
 weights_path = hf_hub_download(repo_id=repo_id, filename=filename)
-# Load the state dict and update the model.
 state_dict = torch.load(weights_path, map_location="cpu")
 model.load_state_dict(state_dict)
 model.eval()
 ########################################
-# 3. Define Label Mappings and Transforms
 ########################################
-# Update these with your actual label mappings.
-# They should reflect the 494 unique pseudo-labels produced during training.
-# For this example, we assume that the mapping is stored somewhere.
-# Here we provide a dummy mapping for illustration. Replace it with your real mapping.
-idx_to_obj_label = {i: f"label_{i}" for i in range(num_obj_classes)}
 bin_label_names = ["AI-Generated", "Real"]
-# Define the validation transforms (must match those used during training)
 val_transforms = transforms.Compose([
     transforms.Resize(256),
     transforms.CenterCrop(224),
@@ -74,13 +65,8 @@ val_transforms = transforms.Compose([
 # 4. Define the Inference Function
 ########################################
 def predict_image(img: Image.Image) -> str:
-    """
-    Takes an uploaded PIL image, processes it, and returns the model's prediction.
-    """
-    # Ensure the image is in RGB mode.
     img = img.convert("RGB")
-    # Apply validation transforms.
-    img_tensor = val_transforms(img).unsqueeze(0).to(device)  # Shape: [1, 3, 224, 224]
     with torch.no_grad():
         obj_logits, bin_logits = model(img_tensor)
     obj_pred = torch.argmax(obj_logits, dim=1).item()
@@ -97,11 +83,7 @@ demo = gr.Interface(
     inputs=gr.Image(type="pil"),
     outputs="text",
     title="Multi-Task Image Classifier",
-    description=(
-        "Upload an image to receive two predictions:\n"
-        "1) The primary object in the image,\n"
-        "2) Whether the image is AI-generated or Real."
-    )
 )
 if __name__ == "__main__":

 import torchvision.transforms as transforms
 from PIL import Image
 from huggingface_hub import hf_hub_download
+import json
 ########################################
 # 1. Define the Model Architecture
     def __init__(self, backbone, feature_dim, num_obj_classes):
         super(MultiTaskModel, self).__init__()
         self.backbone = backbone
         self.obj_head = nn.Linear(feature_dim, num_obj_classes)
         self.bin_head = nn.Linear(feature_dim, 2)
     def forward(self, x):
 ########################################
 # 2. Reconstruct the Model and Load Weights
 ########################################
+num_obj_classes = 494  # Make sure this matches your training
 device = torch.device("cpu")
 resnet = models.resnet50(pretrained=False)
+resnet.fc = nn.Identity()
 feature_dim = 2048
 model = MultiTaskModel(resnet, feature_dim, num_obj_classes)
 model.to(device)
+repo_id = "Abdu07/multitask-model"
+filename = "Yolloplusclassproject_weights.pth"
 weights_path = hf_hub_download(repo_id=repo_id, filename=filename)
 state_dict = torch.load(weights_path, map_location="cpu")
 model.load_state_dict(state_dict)
 model.eval()
 ########################################
+# 3. Load Label Mapping and Define Transforms
 ########################################
+# Load the saved mapping from JSON
+with open("obj_label_mapping.json", "r") as f:
+    obj_label_to_idx = json.load(f)
+# Create the inverse mapping
+idx_to_obj_label = {v: k for k, v in obj_label_to_idx.items()}
 bin_label_names = ["AI-Generated", "Real"]
 val_transforms = transforms.Compose([
     transforms.Resize(256),
     transforms.CenterCrop(224),
 # 4. Define the Inference Function
 ########################################
 def predict_image(img: Image.Image) -> str:
     img = img.convert("RGB")
+    img_tensor = val_transforms(img).unsqueeze(0).to(device)
     with torch.no_grad():
         obj_logits, bin_logits = model(img_tensor)
     obj_pred = torch.argmax(obj_logits, dim=1).item()
     inputs=gr.Image(type="pil"),
     outputs="text",
     title="Multi-Task Image Classifier",
+    description="Upload an image to receive two predictions:\n1) The primary object in the image,\n2) Whether the image is AI-generated or Real."
 )
 if __name__ == "__main__":