File size: 7,921 Bytes
7ea81c0
3df1923
7ea81c0
 
 
 
c73a989
7ea81c0
 
 
 
cff17c0
7ea81c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
991ef04
 
 
7ea81c0
991ef04
7ea81c0
 
 
 
 
 
 
991ef04
 
 
 
 
 
 
 
 
 
 
7ea81c0
 
991ef04
7ea81c0
991ef04
 
 
 
 
 
7ea81c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
991ef04
7ea81c0
 
 
 
 
31a8ed3
7ea81c0
 
 
 
 
 
 
 
 
991ef04
 
 
 
 
 
7ea81c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46194a0
7ea81c0
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
from PIL import Image
import json
import gradio as gr
import requests
from transformers import CLIPProcessor, CLIPModel, pipeline, BlipProcessor, BlipForConditionalGeneration

model = CLIPModel.from_pretrained("model")
processor = CLIPProcessor.from_pretrained("tokenizer")
vqa_pipeline = pipeline("visual-question-answering")

space_type_labels = ["living room", "bedroom", "kitchen", "terrace", "closet","bathroom", "dining room", "office", "garage", "garden",
    "balcony", "attic", "hallway", "laundry room","gym", "playroom", "storage room", "studio","is_exterior","empty_interior_room","swimming pool"]

equipment_questions = [
    "Does the image show outdoor furniture?",
    "Does the image show a parasol?",
    "Does the image show a pergola?",
    "Does the image show a grill?",
    "Does the image show a heater?",
    "Does the image show outdoor lighting?",
    "Does the image show planters?",
    "Does the image show water features?",
    "Does the image show floor coverings?",
    "Does the image show decorative items?",
    "Does the image show entertainment equipment?",
    "Does the image show protective materials?"
]

weights = {
    "Does the image show outdoor furniture?": 0.15,
    "Does the image show a parasol?": 0.05,
    "Does the image show a pergola?": 0.1,
    "Does the image show a grill?": 0.15,
    "Does the image show a heater?": 0.1,
    "Does the image show outdoor lighting?": 0.1,
    "Does the image show planters?": 0.05,
    "Does the image show water features?": 0.1,
    "Does the image show floor coverings?": 0.05,
    "Does the image show decorative items?": 0.05,
    "Does the image show entertainment equipment?": 0.05,
    "Does the image show protective materials?": 0.05
}

luminosity_classes = [
    'A well-lit room with abundant natural light, showcasing windows or a balcony through which sunlight passes unobstructed.',
    'A room depicted in darkness, where there is minimal or no visible light source.',
    'A room illuminated by artificial light sources such as lamps or ceiling lights.'
]

luminosity_labels = ['natural_light', 'no_light', 'artificial_light']

view_questions = [
    "Is this a panoramic view?",
    "Is this a city view?",
    "Is this a view of greenery?",
    "Is this a mountain view?",
    "Is this a view of the sea?",
    "Is this an exterior view of a building?"
]
view_labels = ['panoramic', 'city', 'greenery', 'mountain', 'sea','indoor view','building view']

certainty_classes = [
    'Windows, balconies, or terraces with an unobstructed outward view', 
    'exterior view of a building or appearance of a house or apartment', 
    'Artificial or fake view of any city or sea', 
    'View obstructed by objects such as buildings, trees, or other structures', 
    'Hallway or interior view with no outdoor visibility' 
]

#certainty_classes = ['Windows, balconies, or terraces with an unobstructed outward view','Exterior view appearance of a house or apartment','unreal picture or fake of any city or sea view','view unfree from any obstructive objects such as buildings, trees, or other structures, and ideally seen through windows, balconies, or terraces','hallway']

render_classes = [
    "This is a realistic photo of an interior.",
    "This is a computer-generated render of an interior.",
    "This is a realistic photo of an exterior.",
    "This is a computer-generated render of an exterior."
]

threshold = 0

def calculate_equipment_score(image_results, weights):
    score = sum(weights[question] for question, present in image_results.items() if present)
    return score

def calculate_luminosity_score(processed_image):
    inputs = processor(text=luminosity_classes, images=processed_image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image
    probs = logits_per_image.softmax(dim=1)
    probabilities_list = probs.squeeze().tolist()
    luminosity_score = {class_name: probability for class_name, probability in zip(luminosity_labels, probabilities_list)}
    return luminosity_score

def calculate_space_type(processed_image):
    inputs = processor(text=space_type_labels, images=processed_image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image
    probs = logits_per_image.softmax(dim=1)
    probabilities_list = probs.squeeze().tolist()
    space_type_score = {class_name: probability for class_name, probability in zip(space_type_labels, probabilities_list)}
    return space_type_score

def certainty(processed_image):
    inputs = processor(text=certainty_classes, images=processed_image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image
    probs = logits_per_image.softmax(dim=1)
    probabilities_list = probs.squeeze().tolist()
    is_fake_score = {class_name: probability for class_name, probability in zip(certainty_classes, probabilities_list)}
    return is_fake_score

def views(processed_image):
    inputs = processor(text=view_questions, images=processed_image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image
    probs = logits_per_image.softmax(dim=1)
    probabilities_list = probs.squeeze().tolist()
    views_score = {class_name: probability for class_name, probability in zip(view_labels, probabilities_list)}
    return views_score

def calculate_is_render(processed_image):
    render_inputs = processor(text=render_classes, images=processed_image, return_tensors="pt", padding=True)
    render_outputs = model(**render_inputs)
    render_logits = render_outputs.logits_per_image
    render_probs = render_logits.softmax(dim=1)
    render_probabilities_list = render_probs.squeeze().tolist()
    render_score = {class_name: probability for class_name, probability in zip(render_classes, render_probabilities_list)}
    is_render_prob = render_score["This is a realistic photo of an interior."]+render_score["This is a realistic photo of an exterior."]
    return is_render_prob

def generate_answer(image):

    processed_image = image
    processed_image = processed_image.resize((256, 256))

    image_data = {
        "image_context": None,
        "equipment_score": None,
        "luminosity_score": None,
        "view_type": {"views": None, "certainty_score": None}
    }

    space_type_score = calculate_space_type(processed_image)
    #max_space_type = max(space_type_score, key=space_type_score.get)
    #if space_type_score[max_space_type] >= threshold:
    #    space_type = max_space_type.lower()
    #    if space_type == "patio":
    #        space_type = "terrace"
    image_data["image_context"] = space_type_score

    image_results = {}
    if image_data["image_context"] == "terrace":
      for question in equipment_questions:
          result = vqa_pipeline(processed_image, question, top_k=1)
          answer = result[0]['answer'].lower() == "yes"
          image_results[question] = answer
      equipment_score = calculate_equipment_score(image_results, weights)
      image_data["equipment_score"] = equipment_score

    luminosity_score = calculate_luminosity_score(processed_image)
    image_data["luminosity_score"] = luminosity_score['natural_light']

    view = views(processed_image)
    image_data["view_type"]["views"] = view

    certainty_score = certainty(processed_image)
    certainty_score = list(certainty_score.values())[0]
    image_data["view_type"]["certainty_score"] = certainty_score

    is_render = calculate_is_render(processed_image)
    image_data["is_render"] = is_render

    return json.dumps(image_data, indent=4)


image_input = gr.Image(type="pil", label="Upload Image")

iface = gr.Interface(
    fn=generate_answer, 
    inputs=[image_input], 
    outputs="text",
    title="Vision intelligence",
    description="Upload an image"
)

iface.launch()