File size: 8,671 Bytes
7ea81c0
3df1923
7ea81c0
 
 
 
c73a989
7ea81c0
8522307
7ea81c0
 
10b78eb
7ea81c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fc2390
 
 
7ea81c0
991ef04
8fc2390
 
 
 
 
 
7ea81c0
 
bcca343
bb5e3d2
 
 
 
 
bcca343
 
bb5e3d2
56127c0
 
 
 
92a1835
bb5e3d2
 
fe38dd5
991ef04
 
10b78eb
 
 
 
 
7ea81c0
 
991ef04
7ea81c0
991ef04
 
 
 
 
 
7ea81c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
991ef04
7ea81c0
 
 
 
 
 
 
 
8fc2390
7ea81c0
8fc2390
7ea81c0
 
 
 
fb7055a
8fc2390
10b78eb
 
 
991ef04
7ea81c0
 
10b78eb
8fc2390
 
 
 
 
 
 
 
 
 
 
 
 
5b81acc
8fc2390
 
 
 
 
 
 
 
 
7ea81c0
e40ee10
 
7ea81c0
46194a0
7ea81c0
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
from PIL import Image
import json
import gradio as gr
import requests
from transformers import CLIPProcessor, CLIPModel, pipeline, BlipProcessor, BlipForConditionalGeneration

model = CLIPModel.from_pretrained("model")
processor = CLIPProcessor.from_pretrained("tokenizer")
vqa_pipeline = pipeline("visual-question-answering",model="vqa")

space_type_labels = ["living room", "bedroom", "kitchen", "terrace", "closet","bathroom", "dining room", "office", "garage", "garden",
    "balcony", "attic", "hallway","gym", "playroom", "storage room", "studio","is_exterior","swimming pool","others"]

equipment_questions = [
    "Does the image show outdoor furniture?",
    "Does the image show a parasol?",
    "Does the image show a pergola?",
    "Does the image show a grill?",
    "Does the image show a heater?",
    "Does the image show outdoor lighting?",
    "Does the image show planters?",
    "Does the image show water features?",
    "Does the image show floor coverings?",
    "Does the image show decorative items?",
    "Does the image show entertainment equipment?",
    "Does the image show protective materials?"
]

weights = {
    "Does the image show outdoor furniture?": 0.15,
    "Does the image show a parasol?": 0.05,
    "Does the image show a pergola?": 0.1,
    "Does the image show a grill?": 0.15,
    "Does the image show a heater?": 0.1,
    "Does the image show outdoor lighting?": 0.1,
    "Does the image show planters?": 0.05,
    "Does the image show water features?": 0.1,
    "Does the image show floor coverings?": 0.05,
    "Does the image show decorative items?": 0.05,
    "Does the image show entertainment equipment?": 0.05,
    "Does the image show protective materials?": 0.05
}

luminosity_classes = [
    'A well-lit room with abundant natural light, showcasing windows or a balcony through which sunlight passes unobstructed.',
    'A room depicted in darkness, where there is minimal or no visible light source.',
    'A room illuminated by artificial light sources such as lamps or ceiling lights.'
]

#luminosity_classes = [
#    "A room filled with natural daylight.",
#    "A room lit by artificial lights.",
#    "A dark room with no lights."
#]

luminosity_labels = ['natural_light', 'no_light', 'artificial_light']

#view_questions = [
    #"Is this a panoramic view?",
#    "Is this a city view?",
#    "Is this a view of greenery?",
#    "Is this a mountain view?",
#    "Is this a view of the sea?"
#]

view_questions = [
   # "This is a panoramic view, showing a wide expanse of the surroundings.",
    "This is a city view, showing buildings, streets, and urban areas.",
    "This is a view of greenery, including trees, parks, or gardens.",
    "This is a mountain view, showing mountains and hilly landscapes.",
    "This is a view of the sea"
]

view_labels = ['city', 'greenery', 'mountain', 'sea']

certainty_classes = [
    'Windows, balconies, or terraces with an unobstructed outward view',
    'exterior view of a building or appearance of a house or apartment',
    'Artificial or fake view of any city or sea',
    'View obstructed by objects such as buildings, trees, or other structures',
    'Hallway or interior view with no outdoor visibility'
]

#certainty_classes = ['Windows, balconies, or terraces with an unobstructed outward view','Exterior view appearance of a house or apartment','unreal picture or fake of any city or sea view','view unfree from any obstructive objects such as buildings, trees, or other structures, and ideally seen through windows, balconies, or terraces','hallway']

render_classes = [
    "This is a realistic photo of an interior.",
    "This is a computer-generated render of an interior.",
    "This is a realistic photo of an exterior.",
    "This is a computer-generated render of an exterior."
]

threshold = 0

def calculate_equipment_score(image_results, weights):
    score = sum(weights[question] for question, present in image_results.items() if present)
    return score

def calculate_luminosity_score(processed_image):
    inputs = processor(text=luminosity_classes, images=processed_image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image
    probs = logits_per_image.softmax(dim=1)
    probabilities_list = probs.squeeze().tolist()
    luminosity_score = {class_name: probability for class_name, probability in zip(luminosity_labels, probabilities_list)}
    return luminosity_score

def calculate_space_type(processed_image):
    inputs = processor(text=space_type_labels, images=processed_image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image
    probs = logits_per_image.softmax(dim=1)
    probabilities_list = probs.squeeze().tolist()
    space_type_score = {class_name: probability for class_name, probability in zip(space_type_labels, probabilities_list)}
    return space_type_score

def certainty(processed_image):
    inputs = processor(text=certainty_classes, images=processed_image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image
    probs = logits_per_image.softmax(dim=1)
    probabilities_list = probs.squeeze().tolist()
    is_fake_score = {class_name: probability for class_name, probability in zip(certainty_classes, probabilities_list)}
    return is_fake_score

def views(processed_image):
    inputs = processor(text=view_questions, images=processed_image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image
    probs = logits_per_image.softmax(dim=1)
    probabilities_list = probs.squeeze().tolist()
    views_score = {class_name: probability for class_name, probability in zip(view_labels, probabilities_list)}
    return views_score

def calculate_is_render(processed_image):
    render_inputs = processor(text=render_classes, images=processed_image, return_tensors="pt", padding=True)
    render_outputs = model(**render_inputs)
    render_logits = render_outputs.logits_per_image
    render_probs = render_logits.softmax(dim=1)
    render_probabilities_list = render_probs.squeeze().tolist()
    render_score = {class_name: probability for class_name, probability in zip(render_classes, render_probabilities_list)}
    is_render_prob = render_score["This is a realistic photo of an interior."]+render_score["This is a realistic photo of an exterior."]
    return is_render_prob

def generate_answer(image):

    processed_image = image

    image_data = {
        "image_context": None,
        "validation": None,
        "equipment_score": None,
        "luminosity_score": {"score": None},
        "view_type": {"views": None, "certainty_score": None}
    }

    space_type_score = calculate_space_type(processed_image)
    max_space_type = max(space_type_score, key=space_type_score.get)
    if space_type_score[max_space_type] >= 0:
        space_type = max_space_type.lower()
        if space_type == "patio":
            space_type = "terrace"
    image_data["image_context"] = space_type_score

    image_results = {}
    if max_space_type == "terrace":
      for question in equipment_questions:
          result = vqa_pipeline(processed_image, question, top_k=1)
          answer = result[0]['answer'].lower() == "yes"
          image_results[question] = answer
      equipment_score = calculate_equipment_score(image_results, weights)
      image_data["equipment_score"] = equipment_score

    result = vqa_pipeline(processed_image, "Is there a real window?", top_k=1)
    has_window = result[0]
    image_data["validation"] = "pass validation" if has_window['score'] > 0.9 else "No candidate"

    window_exists = has_window["answer"].lower() == "yes" and has_window["score"] > 0.9

    if max_space_type in ["bedroom", "living room", "kitchen"] and window_exists:
      luminosity_score = calculate_luminosity_score(processed_image)
      image_data["luminosity_score"]['score'] = luminosity_score['natural_light']

      view = views(processed_image)
      image_data["view_type"]["views"] = view

      certainty_score = certainty(processed_image)
      certainty_score = list(certainty_score.values())[0]
      image_data["view_type"]["certainty_score"] = certainty_score

    #is_render = calculate_is_render(processed_image)
    #image_data["is_render"] = is_render

    return json.dumps(image_data, indent=4)


image_input = gr.Image(type="pil", label="Upload Image")

iface = gr.Interface(
    fn=generate_answer, 
    inputs=[image_input], 
    outputs="text",
    title="Vision intelligence",
    description="Upload an image"
)

iface.launch()