import torch
from transformers import AutoFeatureExtractor, AutoModelForObjectDetection
from PIL import Image
import gradio as gr
import numpy as np

# Load the pre-trained DETR model
model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50")
extractor = AutoFeatureExtractor.from_pretrained("facebook/detr-resnet-50")

# Function to calculate materials based on detected areas (example: walls and foundations)
def calculate_materials(detected_objects, image_width, image_height):
    materials = {
        "cement": 0,
        "bricks": 0,
        "steel": 0
    }

    # Proportionality factors (simplified, adjust based on real-world data)
    for obj in detected_objects:
        # Bounding box coordinates in the format [xmin, ymin, xmax, ymax]
        x1, y1, x2, y2 = obj['bbox']
        
        # Calculate real-world dimensions (assuming you have a known scale for your blueprint)
        width = (x2 - x1) * image_width  # Convert to real-world width
        height = (y2 - y1) * image_height  # Convert to real-world height
        
        # Calculate area
        area = width * height  # cm² or m² depending on your scale

        # Print area for debugging
        print(f"Detected {obj['name']} with area {area} cm²")
        
        # Material estimation based on the object name
        if obj['name'] == 'wall':  # Example: For 'wall' objects
            materials['cement'] += area * 0.1  # Cement estimation (in kg)
            materials['bricks'] += area * 10  # Bricks estimation
            materials['steel'] += area * 0.05  # Steel estimation
        
        elif obj['name'] == 'foundation':  # Example: For 'foundation' objects
            materials['cement'] += area * 0.2
            materials['bricks'] += area * 15
            materials['steel'] += area * 0.1

    return materials

# Define the function for image inference
def predict_image(image):
    # Convert image to the required format for the model
    inputs = extractor(images=image, return_tensors="pt")

    # Run inference with DETR
    outputs = model(**inputs)
    
    # Get the predictions from the output (boxes and labels)
    target_sizes = torch.tensor([image.size[::-1]])  # height, width
    results = extractor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.5)[0]

    detected_objects = []
    
    # Process the detected objects and extract bounding boxes and class names
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        box = box.tolist()  # Convert box to list
        detected_objects.append({
            'name': label.item(),  # Get the class name
            'bbox': box  # Bounding box [xmin, ymin, xmax, ymax]
        })

    # Assume blueprint image size (adjust this based on your image scale)
    image_width = image.size[0]  # Image width in pixels
    image_height = image.size[1]  # Image height in pixels
    
    # Calculate materials based on the detected objects
    materials = calculate_materials(detected_objects, image_width, image_height)
    
    # Return the materials as a dictionary
    return materials

# Set up Gradio interface for image input and JSON output
interface = gr.Interface(fn=predict_image, inputs=gr.Image(), outputs=gr.JSON())

# Launch the Gradio interface
if __name__ == "__main__":
    interface.launch()