Spaces:
Sleeping
Sleeping
File size: 8,891 Bytes
7ea81c0 3df1923 7ea81c0 c73a989 7ea81c0 8522307 7ea81c0 10b78eb 7ea81c0 10b78eb 7ea81c0 10b78eb 7ea81c0 991ef04 7ea81c0 10b78eb 33f2b47 10b78eb 7ea81c0 33f2b47 265a06c 568192c ee85725 991ef04 10b78eb 568192c 991ef04 10b78eb 7ea81c0 991ef04 7ea81c0 991ef04 7ea81c0 991ef04 7ea81c0 31a8ed3 7ea81c0 10b78eb 7ea81c0 fb7055a 10b78eb 991ef04 7ea81c0 10b78eb 7ea81c0 46194a0 7ea81c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
from PIL import Image
import json
import gradio as gr
import requests
from transformers import CLIPProcessor, CLIPModel, pipeline, BlipProcessor, BlipForConditionalGeneration
model = CLIPModel.from_pretrained("model")
processor = CLIPProcessor.from_pretrained("tokenizer")
vqa_pipeline = pipeline("visual-question-answering",model="vqa")
space_type_labels = ["living room", "bedroom", "kitchen", "terrace", "closet","bathroom", "dining room", "office", "garage", "garden",
"balcony", "attic", "hallway","gym", "playroom", "storage room", "studio","is_exterior","swimming pool","others"]
equipment_questions = [
"Does the image show outdoor furniture?",
"Does the image show a parasol?",
"Does the image show a pergola?",
"Does the image show a grill?",
"Does the image show a heater?",
"Does the image show outdoor lighting?",
"Does the image show planters?",
"Does the image show water features?",
"Does the image show floor coverings?",
"Does the image show decorative items?",
"Does the image show entertainment equipment?",
"Does the image show protective materials?"
]
weights = {
"Does the image show outdoor furniture?": 0.15,
"Does the image show a parasol?": 0.05,
"Does the image show a pergola?": 0.1,
"Does the image show a grill?": 0.15,
"Does the image show a heater?": 0.1,
"Does the image show outdoor lighting?": 0.1,
"Does the image show planters?": 0.05,
"Does the image show water features?": 0.1,
"Does the image show floor coverings?": 0.05,
"Does the image show decorative items?": 0.05,
"Does the image show entertainment equipment?": 0.05,
"Does the image show protective materials?": 0.05
}
#luminosity_classes = [
# 'A well-lit room with abundant natural light, showcasing windows or a balcony through which sunlight passes unobstructed.',
# 'A room depicted in darkness, where there is minimal or no visible light source.',
# 'A room illuminated by artificial light sources such as lamps or ceiling lights.'
#]
luminosity_classes = [
"A room filled with natural daylight.",
"A room lit by artificial lights.",
"A dark room with no lights."
]
luminosity_labels = ['natural_light', 'no_light', 'artificial_light']
#view_questions = [
# "Is this a panoramic view?",
# "Is this a city view?",
# "Is this a view of greenery?",
# "Is this a mountain view?",
# "Is this a view of the sea?"
#]
view_questions = [
#"This is a panoramic view, showing a wide expanse of the surroundings, including both natural and urban elements.",
"This is a city view, showing buildings, streets, and urban infrastructure.",
"This is a view of greenery, focusing on trees, parks, gardens, and other vegetative elements.",
"This is a mountain view, showing mountains, hills, and rocky landscapes.",
"This is a view of the sea, focusing on oceans, beaches, and large bodies of water.",
"There is not window to see outside"
]
view_labels = ['city', 'greenery', 'mountain', 'sea','not_clear']
certainty_classes = [
'Windows, balconies, or terraces with an unobstructed outward view',
'exterior view of a building or appearance of a house or apartment',
'Artificial or fake view of any city or sea',
'View obstructed by objects such as buildings, trees, or other structures',
'Hallway or interior view with no outdoor visibility'
]
#certainty_classes = ['Windows, balconies, or terraces with an unobstructed outward view','Exterior view appearance of a house or apartment','unreal picture or fake of any city or sea view','view unfree from any obstructive objects such as buildings, trees, or other structures, and ideally seen through windows, balconies, or terraces','hallway']
render_classes = [
"This is a realistic photo of an interior.",
"This is a computer-generated render of an interior.",
"This is a realistic photo of an exterior.",
"This is a computer-generated render of an exterior."
]
threshold = 0
def calculate_equipment_score(image_results, weights):
score = sum(weights[question] for question, present in image_results.items() if present)
return score
def calculate_luminosity_score(processed_image):
inputs = processor(text=luminosity_classes, images=processed_image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
probabilities_list = probs.squeeze().tolist()
luminosity_score = {class_name: probability for class_name, probability in zip(luminosity_labels, probabilities_list)}
return luminosity_score
def calculate_space_type(processed_image):
inputs = processor(text=space_type_labels, images=processed_image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
probabilities_list = probs.squeeze().tolist()
space_type_score = {class_name: probability for class_name, probability in zip(space_type_labels, probabilities_list)}
return space_type_score
def certainty(processed_image):
inputs = processor(text=certainty_classes, images=processed_image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
probabilities_list = probs.squeeze().tolist()
is_fake_score = {class_name: probability for class_name, probability in zip(certainty_classes, probabilities_list)}
return is_fake_score
def views(processed_image):
inputs = processor(text=view_questions, images=processed_image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
probabilities_list = probs.squeeze().tolist()
views_score = {class_name: probability for class_name, probability in zip(view_labels, probabilities_list)}
return views_score
def calculate_is_render(processed_image):
render_inputs = processor(text=render_classes, images=processed_image, return_tensors="pt", padding=True)
render_outputs = model(**render_inputs)
render_logits = render_outputs.logits_per_image
render_probs = render_logits.softmax(dim=1)
render_probabilities_list = render_probs.squeeze().tolist()
render_score = {class_name: probability for class_name, probability in zip(render_classes, render_probabilities_list)}
is_render_prob = render_score["This is a realistic photo of an interior."]+render_score["This is a realistic photo of an exterior."]
return is_render_prob
def generate_answer(image):
processed_image = image
processed_image = processed_image.resize((256, 256))
image_data = {
"image_context": None,
"equipment_score": None,
"luminosity_score": {"score": None,
"has_window": {
"score": None,
"answer": None
}},
"view_type": {"views": None, "certainty_score": None}
}
space_type_score = calculate_space_type(processed_image)
max_space_type = max(space_type_score, key=space_type_score.get)
if space_type_score[max_space_type] >= threshold:
space_type = max_space_type.lower()
if space_type == "patio":
space_type = "terrace"
image_data["image_context"] = space_type_score
image_results = {}
if max_space_type == "terrace":
for question in equipment_questions:
result = vqa_pipeline(processed_image, question, top_k=1)
answer = result[0]['answer'].lower() == "yes"
image_results[question] = answer
equipment_score = calculate_equipment_score(image_results, weights)
image_data["equipment_score"] = equipment_score
if max_space_type in ["bedroom", "living room", "kitchen"]:
luminosity_score = calculate_luminosity_score(processed_image)
image_data["luminosity_score"]['score'] = luminosity_score['natural_light']
result = vqa_pipeline(processed_image, "Is there a real window?", top_k=1)
has_window = result[0]
image_data["luminosity_score"]["has_window"] = has_window
view = views(processed_image)
image_data["view_type"]["views"] = view
certainty_score = certainty(processed_image)
certainty_score = list(certainty_score.values())[0]
image_data["view_type"]["certainty_score"] = certainty_score
is_render = calculate_is_render(processed_image)
image_data["is_render"] = is_render
return json.dumps(image_data, indent=4)
image_input = gr.Image(type="pil", label="Upload Image")
iface = gr.Interface(
fn=generate_answer,
inputs=[image_input],
outputs="text",
title="Vision intelligence",
description="Upload an image"
)
iface.launch() |