Spaces:
Sleeping
Sleeping
File size: 7,921 Bytes
7ea81c0 3df1923 7ea81c0 c73a989 7ea81c0 cff17c0 7ea81c0 991ef04 7ea81c0 991ef04 7ea81c0 991ef04 7ea81c0 991ef04 7ea81c0 991ef04 7ea81c0 991ef04 7ea81c0 31a8ed3 7ea81c0 991ef04 7ea81c0 46194a0 7ea81c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
from PIL import Image
import json
import gradio as gr
import requests
from transformers import CLIPProcessor, CLIPModel, pipeline, BlipProcessor, BlipForConditionalGeneration
model = CLIPModel.from_pretrained("model")
processor = CLIPProcessor.from_pretrained("tokenizer")
vqa_pipeline = pipeline("visual-question-answering")
space_type_labels = ["living room", "bedroom", "kitchen", "terrace", "closet","bathroom", "dining room", "office", "garage", "garden",
"balcony", "attic", "hallway", "laundry room","gym", "playroom", "storage room", "studio","is_exterior","empty_interior_room","swimming pool"]
equipment_questions = [
"Does the image show outdoor furniture?",
"Does the image show a parasol?",
"Does the image show a pergola?",
"Does the image show a grill?",
"Does the image show a heater?",
"Does the image show outdoor lighting?",
"Does the image show planters?",
"Does the image show water features?",
"Does the image show floor coverings?",
"Does the image show decorative items?",
"Does the image show entertainment equipment?",
"Does the image show protective materials?"
]
weights = {
"Does the image show outdoor furniture?": 0.15,
"Does the image show a parasol?": 0.05,
"Does the image show a pergola?": 0.1,
"Does the image show a grill?": 0.15,
"Does the image show a heater?": 0.1,
"Does the image show outdoor lighting?": 0.1,
"Does the image show planters?": 0.05,
"Does the image show water features?": 0.1,
"Does the image show floor coverings?": 0.05,
"Does the image show decorative items?": 0.05,
"Does the image show entertainment equipment?": 0.05,
"Does the image show protective materials?": 0.05
}
luminosity_classes = [
'A well-lit room with abundant natural light, showcasing windows or a balcony through which sunlight passes unobstructed.',
'A room depicted in darkness, where there is minimal or no visible light source.',
'A room illuminated by artificial light sources such as lamps or ceiling lights.'
]
luminosity_labels = ['natural_light', 'no_light', 'artificial_light']
view_questions = [
"Is this a panoramic view?",
"Is this a city view?",
"Is this a view of greenery?",
"Is this a mountain view?",
"Is this a view of the sea?",
"Is this an exterior view of a building?"
]
view_labels = ['panoramic', 'city', 'greenery', 'mountain', 'sea','indoor view','building view']
certainty_classes = [
'Windows, balconies, or terraces with an unobstructed outward view',
'exterior view of a building or appearance of a house or apartment',
'Artificial or fake view of any city or sea',
'View obstructed by objects such as buildings, trees, or other structures',
'Hallway or interior view with no outdoor visibility'
]
#certainty_classes = ['Windows, balconies, or terraces with an unobstructed outward view','Exterior view appearance of a house or apartment','unreal picture or fake of any city or sea view','view unfree from any obstructive objects such as buildings, trees, or other structures, and ideally seen through windows, balconies, or terraces','hallway']
render_classes = [
"This is a realistic photo of an interior.",
"This is a computer-generated render of an interior.",
"This is a realistic photo of an exterior.",
"This is a computer-generated render of an exterior."
]
threshold = 0
def calculate_equipment_score(image_results, weights):
score = sum(weights[question] for question, present in image_results.items() if present)
return score
def calculate_luminosity_score(processed_image):
inputs = processor(text=luminosity_classes, images=processed_image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
probabilities_list = probs.squeeze().tolist()
luminosity_score = {class_name: probability for class_name, probability in zip(luminosity_labels, probabilities_list)}
return luminosity_score
def calculate_space_type(processed_image):
inputs = processor(text=space_type_labels, images=processed_image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
probabilities_list = probs.squeeze().tolist()
space_type_score = {class_name: probability for class_name, probability in zip(space_type_labels, probabilities_list)}
return space_type_score
def certainty(processed_image):
inputs = processor(text=certainty_classes, images=processed_image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
probabilities_list = probs.squeeze().tolist()
is_fake_score = {class_name: probability for class_name, probability in zip(certainty_classes, probabilities_list)}
return is_fake_score
def views(processed_image):
inputs = processor(text=view_questions, images=processed_image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
probabilities_list = probs.squeeze().tolist()
views_score = {class_name: probability for class_name, probability in zip(view_labels, probabilities_list)}
return views_score
def calculate_is_render(processed_image):
render_inputs = processor(text=render_classes, images=processed_image, return_tensors="pt", padding=True)
render_outputs = model(**render_inputs)
render_logits = render_outputs.logits_per_image
render_probs = render_logits.softmax(dim=1)
render_probabilities_list = render_probs.squeeze().tolist()
render_score = {class_name: probability for class_name, probability in zip(render_classes, render_probabilities_list)}
is_render_prob = render_score["This is a realistic photo of an interior."]+render_score["This is a realistic photo of an exterior."]
return is_render_prob
def generate_answer(image):
processed_image = image
processed_image = processed_image.resize((256, 256))
image_data = {
"image_context": None,
"equipment_score": None,
"luminosity_score": None,
"view_type": {"views": None, "certainty_score": None}
}
space_type_score = calculate_space_type(processed_image)
#max_space_type = max(space_type_score, key=space_type_score.get)
#if space_type_score[max_space_type] >= threshold:
# space_type = max_space_type.lower()
# if space_type == "patio":
# space_type = "terrace"
image_data["image_context"] = space_type_score
image_results = {}
if image_data["image_context"] == "terrace":
for question in equipment_questions:
result = vqa_pipeline(processed_image, question, top_k=1)
answer = result[0]['answer'].lower() == "yes"
image_results[question] = answer
equipment_score = calculate_equipment_score(image_results, weights)
image_data["equipment_score"] = equipment_score
luminosity_score = calculate_luminosity_score(processed_image)
image_data["luminosity_score"] = luminosity_score['natural_light']
view = views(processed_image)
image_data["view_type"]["views"] = view
certainty_score = certainty(processed_image)
certainty_score = list(certainty_score.values())[0]
image_data["view_type"]["certainty_score"] = certainty_score
is_render = calculate_is_render(processed_image)
image_data["is_render"] = is_render
return json.dumps(image_data, indent=4)
image_input = gr.Image(type="pil", label="Upload Image")
iface = gr.Interface(
fn=generate_answer,
inputs=[image_input],
outputs="text",
title="Vision intelligence",
description="Upload an image"
)
iface.launch() |