Spaces:

prodevroger
/

sam

Sleeping

sam

File size: 8,040 Bytes

import numpy as np
import cv2
import torch
from segment_anything import SamPredictor, sam_model_registry
from diffusers import StableDiffusionInpaintPipeline
import gradio as gr
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import tempfile
import os

# Initialize models (cached for performance)
sam_model = None
sd_pipe = None
predictor = None

def load_models():
    global sam_model, sd_pipe, predictor
    if sam_model is None:
        print("Loading SAM model...")
        sam_model = sam_model_registry["vit_b"](checkpoint="sam_vit_b_01ec64.pth")  # Use available checkpoint
        missing_keys, unexpected_keys = sam_model.load_state_dict(
            torch.load("sam_vit_b_01ec64.pth"), strict=False
        )
        if missing_keys or unexpected_keys:
            print(f"Warning: Missing keys: {missing_keys}, Unexpected keys: {unexpected_keys}")
        predictor = SamPredictor(sam_model)
    
    if sd_pipe is None:
        print("Loading Stable Diffusion model...")
        device = "cuda" if torch.cuda.is_available() else "cpu"
        sd_pipe = StableDiffusionInpaintPipeline.from_pretrained(
            "stabilityai/stable-diffusion-2-inpainting",
            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
            safety_checker=None
        ).to(device)
    
    return predictor, sd_pipe

def process_house_image(image, scale):
    predictor, _ = load_models()
    
    # Convert to RGB and numpy array
    image = np.array(image)
    if image.shape[-1] == 4:  # Remove alpha channel if exists
        image = image[..., :3]
    
    # Process with SAM
    predictor.set_image(image)
    masks, scores, _ = predictor.predict()
    
    # Filter for roof segments
    roof_masks = []
    for i, mask in enumerate(masks):
        # Basic roof filtering (position and size)
        y_indices, x_indices = np.where(mask)
        if len(y_indices) == 0:
            continue
            
        centroid_y = np.mean(y_indices)
        height_ratio = (np.max(y_indices) - np.min(y_indices)) / image.shape[0]
        
        # Roofs are typically in upper half and cover significant vertical space
        if centroid_y < image.shape[0] * 0.6 and height_ratio > 0.1:
            roof_masks.append((mask, scores[i]))
    
    # Sort by score and select top masks
    roof_masks.sort(key=lambda x: x[1], reverse=True)
    final_mask = np.zeros(image.shape[:2], dtype=bool)
    
    # Combine top 3 roof masks
    for mask, _ in roof_masks[:3]:
        final_mask = np.logical_or(final_mask, mask)
    
    # Create overlay visualization
    overlay = image.copy()
    cmap = LinearSegmentedColormap.from_list('roof_cmap', ['#00000000', '#ff000080'])
    mask_rgb = (cmap(final_mask.astype(float))[..., :3] * 255).astype(np.uint8)
    overlay = (0.6 * overlay + 0.4 * mask_rgb).astype(np.uint8)
    
    # Calculate area
    roof_pixels = np.sum(final_mask)
    roof_area = roof_pixels / (scale ** 2)  # in square meters
    
    return overlay, final_mask, roof_area

def calculate_sheets(roof_area, sheet_width, sheet_height, waste_factor=0.15):
    sheet_area = sheet_width * sheet_height
    sheets = (roof_area / sheet_area) * (1 + waste_factor)
    return int(np.ceil(sheets))

def generate_new_roof(image, roof_mask, pattern_prompt, sheet_width, sheet_height):
    _, sd_pipe = load_models()
    
    # Convert to PIL format
    image_pil = Image.fromarray(image)
    
    # Convert mask to PIL format
    mask_pil = Image.fromarray(roof_mask.astype(np.uint8) * 255)
    
    # Enhance prompt with sheet dimensions
    enhanced_prompt = f"{pattern_prompt}, {sheet_width:.2f}m x {sheet_height:.2f}m sheets, architectural visualization, photorealistic"
    
    # Generate new roof
    result = sd_pipe(
        prompt=enhanced_prompt,
        image=image_pil,
        mask_image=mask_pil,
        num_inference_steps=30,
        guidance_scale=7.5
    ).images[0]
    
    return result

def full_process(image, scale, sheet_width, sheet_height, pattern_prompt):
    # Convert image to numpy array
    if isinstance(image, str):
        image = np.array(Image.open(image))
    else:
        image = np.array(image)
    
    # Process image to get roof mask
    overlay, roof_mask, roof_area = process_house_image(image, scale)
    
    # Calculate sheets needed
    sheets_needed = calculate_sheets(roof_area, sheet_width, sheet_height)
    
    # Generate new roof visualization
    new_roof_image = generate_new_roof(image, roof_mask, pattern_prompt, sheet_width, sheet_height)
    
    # Create result visualization
    fig, ax = plt.subplots(1, 3, figsize=(18, 6))
    
    # Original with overlay
    ax[0].imshow(overlay)
    ax[0].set_title("Roof Segmentation")
    ax[0].axis('off')
    
    # New roof
    ax[1].imshow(new_roof_image)
    ax[1].set_title("New Roof Design")
    ax[1].axis('off')
    
    # Info panel
    info_text = f"Roof Area: {roof_area:.2f} m²\n\n" \
                f"Sheet Size: {sheet_width} × {sheet_height} m\n\n" \
                f"Sheets Needed: {sheets_needed}\n\n" \
                f"Pattern: {pattern_prompt}"
    
    ax[2].text(0.1, 0.5, info_text, fontsize=12, 
              bbox=dict(facecolor='white', alpha=0.8))
    ax[2].axis('off')
    plt.tight_layout()
    
    # Save to temp file
    temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
    plt.savefig(temp_file.name, bbox_inches='tight')
    plt.close()
    
    return temp_file.name, sheets_needed

# Gradio interface
with gr.Blocks(title="Roof Renovation System", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🏠 Roof Segmentation & Renovation System")
    gr.Markdown("Upload a house image, specify dimensions, and visualize new roof designs with material calculations")
    
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(label="Upload House Image", type="filepath")
            scale_input = gr.Slider(1, 500, value=100, 
                                   label="Scale (pixels per meter)",
                                   info="Adjust based on image perspective")
            
            pattern_prompt = gr.Textbox(label="Roof Pattern Description", 
                                      value="modern red tile pattern",
                                      placeholder="Describe the new roof pattern")
            
            with gr.Row():
                sheet_width = gr.Number(label="Sheet Width (meters)", value=0.5)
                sheet_height = gr.Number(label="Sheet Height (meters)", value=2.0)
            
            submit_btn = gr.Button("Generate Roof Design", variant="primary")
        
        with gr.Column():
            output_image = gr.Image(label="Results", interactive=False)
            sheets_output = gr.Number(label="Sheets Needed", interactive=False)
    
    submit_btn.click(
        fn=full_process,
        inputs=[image_input, scale_input, sheet_width, sheet_height, pattern_prompt],
        outputs=[output_image, sheets_output]
    )
    
    gr.Markdown("### How It Works:")
    gr.Markdown("1. Upload a house image (aerial or perspective view)  \n"
               "2. Adjust the scale (pixels per meter) based on image perspective  \n"
               "3. Enter new roof sheet dimensions  \n"
               "4. Describe your desired roof pattern  \n"
               "5. Click generate to see the new design and material requirements")
    
    gr.Markdown("### Technical Notes:")
    gr.Markdown("- Uses Meta's Segment Anything Model (SAM) for roof detection  \n"
               "- Utilizes Stable Diffusion for realistic roof pattern generation  \n"
               "- Material calculations include 15% wastage factor  \n"
               "- Processing may take 20-40 seconds depending on image size")

# For Hugging Face Spaces deployment
if __name__ == "__main__":
    # Create example images directory if needed
    os.makedirs("examples", exist_ok=True)
    
    # Run the app
    demo.launch()