Spaces:
Sleeping
Sleeping
File size: 8,671 Bytes
7ea81c0 3df1923 7ea81c0 c73a989 7ea81c0 8522307 7ea81c0 10b78eb 7ea81c0 8fc2390 7ea81c0 991ef04 8fc2390 7ea81c0 bcca343 bb5e3d2 bcca343 bb5e3d2 56127c0 92a1835 bb5e3d2 fe38dd5 991ef04 10b78eb 7ea81c0 991ef04 7ea81c0 991ef04 7ea81c0 991ef04 7ea81c0 8fc2390 7ea81c0 8fc2390 7ea81c0 fb7055a 8fc2390 10b78eb 991ef04 7ea81c0 10b78eb 8fc2390 5b81acc 8fc2390 7ea81c0 e40ee10 7ea81c0 46194a0 7ea81c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
from PIL import Image
import json
import gradio as gr
import requests
from transformers import CLIPProcessor, CLIPModel, pipeline, BlipProcessor, BlipForConditionalGeneration
model = CLIPModel.from_pretrained("model")
processor = CLIPProcessor.from_pretrained("tokenizer")
vqa_pipeline = pipeline("visual-question-answering",model="vqa")
space_type_labels = ["living room", "bedroom", "kitchen", "terrace", "closet","bathroom", "dining room", "office", "garage", "garden",
"balcony", "attic", "hallway","gym", "playroom", "storage room", "studio","is_exterior","swimming pool","others"]
equipment_questions = [
"Does the image show outdoor furniture?",
"Does the image show a parasol?",
"Does the image show a pergola?",
"Does the image show a grill?",
"Does the image show a heater?",
"Does the image show outdoor lighting?",
"Does the image show planters?",
"Does the image show water features?",
"Does the image show floor coverings?",
"Does the image show decorative items?",
"Does the image show entertainment equipment?",
"Does the image show protective materials?"
]
weights = {
"Does the image show outdoor furniture?": 0.15,
"Does the image show a parasol?": 0.05,
"Does the image show a pergola?": 0.1,
"Does the image show a grill?": 0.15,
"Does the image show a heater?": 0.1,
"Does the image show outdoor lighting?": 0.1,
"Does the image show planters?": 0.05,
"Does the image show water features?": 0.1,
"Does the image show floor coverings?": 0.05,
"Does the image show decorative items?": 0.05,
"Does the image show entertainment equipment?": 0.05,
"Does the image show protective materials?": 0.05
}
luminosity_classes = [
'A well-lit room with abundant natural light, showcasing windows or a balcony through which sunlight passes unobstructed.',
'A room depicted in darkness, where there is minimal or no visible light source.',
'A room illuminated by artificial light sources such as lamps or ceiling lights.'
]
#luminosity_classes = [
# "A room filled with natural daylight.",
# "A room lit by artificial lights.",
# "A dark room with no lights."
#]
luminosity_labels = ['natural_light', 'no_light', 'artificial_light']
#view_questions = [
#"Is this a panoramic view?",
# "Is this a city view?",
# "Is this a view of greenery?",
# "Is this a mountain view?",
# "Is this a view of the sea?"
#]
view_questions = [
# "This is a panoramic view, showing a wide expanse of the surroundings.",
"This is a city view, showing buildings, streets, and urban areas.",
"This is a view of greenery, including trees, parks, or gardens.",
"This is a mountain view, showing mountains and hilly landscapes.",
"This is a view of the sea"
]
view_labels = ['city', 'greenery', 'mountain', 'sea']
certainty_classes = [
'Windows, balconies, or terraces with an unobstructed outward view',
'exterior view of a building or appearance of a house or apartment',
'Artificial or fake view of any city or sea',
'View obstructed by objects such as buildings, trees, or other structures',
'Hallway or interior view with no outdoor visibility'
]
#certainty_classes = ['Windows, balconies, or terraces with an unobstructed outward view','Exterior view appearance of a house or apartment','unreal picture or fake of any city or sea view','view unfree from any obstructive objects such as buildings, trees, or other structures, and ideally seen through windows, balconies, or terraces','hallway']
render_classes = [
"This is a realistic photo of an interior.",
"This is a computer-generated render of an interior.",
"This is a realistic photo of an exterior.",
"This is a computer-generated render of an exterior."
]
threshold = 0
def calculate_equipment_score(image_results, weights):
score = sum(weights[question] for question, present in image_results.items() if present)
return score
def calculate_luminosity_score(processed_image):
inputs = processor(text=luminosity_classes, images=processed_image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
probabilities_list = probs.squeeze().tolist()
luminosity_score = {class_name: probability for class_name, probability in zip(luminosity_labels, probabilities_list)}
return luminosity_score
def calculate_space_type(processed_image):
inputs = processor(text=space_type_labels, images=processed_image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
probabilities_list = probs.squeeze().tolist()
space_type_score = {class_name: probability for class_name, probability in zip(space_type_labels, probabilities_list)}
return space_type_score
def certainty(processed_image):
inputs = processor(text=certainty_classes, images=processed_image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
probabilities_list = probs.squeeze().tolist()
is_fake_score = {class_name: probability for class_name, probability in zip(certainty_classes, probabilities_list)}
return is_fake_score
def views(processed_image):
inputs = processor(text=view_questions, images=processed_image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
probabilities_list = probs.squeeze().tolist()
views_score = {class_name: probability for class_name, probability in zip(view_labels, probabilities_list)}
return views_score
def calculate_is_render(processed_image):
render_inputs = processor(text=render_classes, images=processed_image, return_tensors="pt", padding=True)
render_outputs = model(**render_inputs)
render_logits = render_outputs.logits_per_image
render_probs = render_logits.softmax(dim=1)
render_probabilities_list = render_probs.squeeze().tolist()
render_score = {class_name: probability for class_name, probability in zip(render_classes, render_probabilities_list)}
is_render_prob = render_score["This is a realistic photo of an interior."]+render_score["This is a realistic photo of an exterior."]
return is_render_prob
def generate_answer(image):
processed_image = image
image_data = {
"image_context": None,
"validation": None,
"equipment_score": None,
"luminosity_score": {"score": None},
"view_type": {"views": None, "certainty_score": None}
}
space_type_score = calculate_space_type(processed_image)
max_space_type = max(space_type_score, key=space_type_score.get)
if space_type_score[max_space_type] >= 0:
space_type = max_space_type.lower()
if space_type == "patio":
space_type = "terrace"
image_data["image_context"] = space_type_score
image_results = {}
if max_space_type == "terrace":
for question in equipment_questions:
result = vqa_pipeline(processed_image, question, top_k=1)
answer = result[0]['answer'].lower() == "yes"
image_results[question] = answer
equipment_score = calculate_equipment_score(image_results, weights)
image_data["equipment_score"] = equipment_score
result = vqa_pipeline(processed_image, "Is there a real window?", top_k=1)
has_window = result[0]
image_data["validation"] = "pass validation" if has_window['score'] > 0.9 else "No candidate"
window_exists = has_window["answer"].lower() == "yes" and has_window["score"] > 0.9
if max_space_type in ["bedroom", "living room", "kitchen"] and window_exists:
luminosity_score = calculate_luminosity_score(processed_image)
image_data["luminosity_score"]['score'] = luminosity_score['natural_light']
view = views(processed_image)
image_data["view_type"]["views"] = view
certainty_score = certainty(processed_image)
certainty_score = list(certainty_score.values())[0]
image_data["view_type"]["certainty_score"] = certainty_score
#is_render = calculate_is_render(processed_image)
#image_data["is_render"] = is_render
return json.dumps(image_data, indent=4)
image_input = gr.Image(type="pil", label="Upload Image")
iface = gr.Interface(
fn=generate_answer,
inputs=[image_input],
outputs="text",
title="Vision intelligence",
description="Upload an image"
)
iface.launch() |