Spaces:

JuanMa360
/

vision_intelligence

Sleeping

App Files Files Community

JuanMa360 commited on May 27, 2024

Commit

7ea81c0

1 Parent(s): 8a23b20

feat: init

Browse files

Files changed (9) hide show

app.py +169 -0
requirements.txt +2 -0
tokenizer/.DS_Store +0 -0
tokenizer/merges.txt +0 -0
tokenizer/preprocessor_config.json +19 -0
tokenizer/special_tokens_map.json +1 -0
tokenizer/tokenizer.json +0 -0
tokenizer/tokenizer_config.json +1 -0
tokenizer/vocab.json +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,169 @@

+from PIL import Image
+import gradio as gr
+import requests
+from transformers import CLIPProcessor, CLIPModel, pipeline, BlipProcessor, BlipForConditionalGeneration
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+processor = CLIPProcessor.from_pretrained("tokenizer")
+vqa_pipeline = pipeline("visual-question-answering")
+space_type_labels = ["living room", "bedroom", "kitchen", "terrace", "closet","bathroom", "dining room", "office", "garage", "garden",
+    "balcony", "attic", "hallway", "laundry room","home gym", "playroom", "storage room", "studio","is_exterior","empty_interior_room","others"]
+equipment_questions = [
+    "Does the image show outdoor furniture?",
+    "Does the image show a parasol?",
+    "Does the image show a pergola?",
+    "Does the image show a grill?",
+    "Does the image show a heater?",
+    "Does the image show outdoor lighting?",
+    "Does the image show planters?",
+    "Does the image show water features?",
+    "Does the image show floor coverings?",
+    "Does the image show decorative items?",
+    "Does the image show entertainment equipment?",
+    "Does the image show protective materials?"
+]
+weights = {
+    "Does the image show outdoor furniture?": 0.15,
+    "Does the image show a parasol?": 0.05,
+    "Does the image show a pergola?": 0.1,
+    "Does the image show a grill?": 0.15,
+    "Does the image show a heater?": 0.1,
+    "Does the image show outdoor lighting?": 0.1,
+    "Does the image show planters?": 0.05,
+    "Does the image show water features?": 0.1,
+    "Does the image show floor coverings?": 0.05,
+    "Does the image show decorative items?": 0.05,
+    "Does the image show entertainment equipment?": 0.05,
+    "Does the image show protective materials?": 0.05
+}
+luminosity_classes = [
+    'A picture of a room filled with abundant natural light with a lot or few windows or a great balcony regardless of whether it is night, without objects that prevent the light from passing through.',
+    'a picture of room in the dark',
+    'A picture of a room with Artificial lights like lamps or headlamps'
+]
+luminosity_labels = ['natural_light', 'no_light', 'artificial_light']
+view_questions = [
+    "Is this a panoramic view?",
+    "Is this a city view?",
+    "Is this a view of greenery?",
+    "Is this a mountain view?",
+    "Is this a view of the sea?"
+]
+view_labels = ['panoramic', 'city', 'greenery', 'mountain', 'sea']
+certainty_classes = ['windows, balcony or terrace with a view outwards','Exterior appearance of a house or apartment','unreal image or fake of any view']
+render_classes = ['is_unrealistic_image_render', 'is_image_real']
+threshold = 0
+def calculate_equipment_score(image_results, weights):
+    score = sum(weights[question] for question, present in image_results.items() if present)
+    return score
+def calculate_luminosity_score(processed_image):
+    inputs = processor(text=luminosity_classes, images=processed_image, return_tensors="pt", padding=True)
+    outputs = model(**inputs)
+    logits_per_image = outputs.logits_per_image
+    probs = logits_per_image.softmax(dim=1)
+    probabilities_list = probs.squeeze().tolist()
+    luminosity_score = {class_name: probability for class_name, probability in zip(luminosity_labels, probabilities_list)}
+    return luminosity_score
+def calculate_space_type(processed_image):
+    inputs = processor(text=space_type_labels, images=processed_image, return_tensors="pt", padding=True)
+    outputs = model(**inputs)
+    logits_per_image = outputs.logits_per_image
+    probs = logits_per_image.softmax(dim=1)
+    probabilities_list = probs.squeeze().tolist()
+    space_type_score = {class_name: probability for class_name, probability in zip(space_type_labels, probabilities_list)}
+    return space_type_score
+def certainty(processed_image):
+    inputs = processor(text=certainty_classes, images=processed_image, return_tensors="pt", padding=True)
+    outputs = model(**inputs)
+    logits_per_image = outputs.logits_per_image
+    probs = logits_per_image.softmax(dim=1)
+    probabilities_list = probs.squeeze().tolist()
+    is_fake_score = {class_name: probability for class_name, probability in zip(certainty_classes, probabilities_list)}
+    return is_fake_score
+def views(processed_image):
+    inputs = processor(text=view_questions, images=processed_image, return_tensors="pt", padding=True)
+    outputs = model(**inputs)
+    logits_per_image = outputs.logits_per_image
+    probs = logits_per_image.softmax(dim=1)
+    probabilities_list = probs.squeeze().tolist()
+    views_score = {class_name: probability for class_name, probability in zip(view_labels, probabilities_list)}
+    return views_score
+def calculate_is_render(processed_image):
+    render_inputs = processor(text=render_classes, images=processed_image, return_tensors="pt", padding=True)
+    render_outputs = model(**render_inputs)
+    render_logits = render_outputs.logits_per_image
+    render_probs = render_logits.softmax(dim=1)
+    render_probabilities_list = render_probs.squeeze().tolist()
+    render_score = {class_name: probability for class_name, probability in zip(render_classes, render_probabilities_list)}
+    is_render_prob = render_score['is_unrealistic_image_render']
+    return is_render_prob
+def generate_answer(image):
+    processed_image = image
+    image_data = {
+        "image_context": None,
+        "equipment_score": None,
+        "luminosity_score": None,
+        "view_type": {"views": None, "certainty_score": None}
+    }
+    space_type_score = calculate_space_type(processed_image)
+    max_space_type = max(space_type_score, key=space_type_score.get)
+    if space_type_score[max_space_type] >= threshold:
+        space_type = max_space_type.lower()
+        if space_type == "patio":
+            space_type = "terrace"
+        image_data["image_context"] = space_type
+    image_results = {}
+    if image_data["image_context"] == "terrace":
+      for question in equipment_questions:
+          result = vqa_pipeline(processed_image, question, top_k=1)
+          answer = result[0]['answer'].lower() == "yes"
+          image_results[question] = answer
+      equipment_score = calculate_equipment_score(image_results, weights)
+      image_data["equipment_score"] = equipment_score
+    luminosity_score = calculate_luminosity_score(processed_image)
+    image_data["luminosity_score"] = luminosity_score['natural_light']
+    view = views(processed_image)
+    image_data["view_type"]["views"] = view
+    certainty_score = certainty(processed_image)
+    certainty_score = list(certainty_score.values())[0]
+    image_data["view_type"]["certainty_score"] = certainty_score
+    is_render = calculate_is_render(processed_image)
+    image_data["is_render"] = is_render
+    return image_data
+image_input = gr.Image(type="pil", label="Upload Image")
+iface = gr.Interface(
+    fn=generate_answer,
+    inputs=[image_input],
+    outputs="text",
+    title="Vision intelligence",
+    description="Upload an image"
+)
+iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ transformers
2	+ accelerate

tokenizer/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "crop_size": 224,
+  "do_center_crop": true,
+  "do_normalize": true,
+  "do_resize": true,
+  "feature_extractor_type": "CLIPFeatureExtractor",
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "resample": 3,
+  "size": 224
+}

tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": {"content": "<\|startoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": "<\|endoftext\|>"}

tokenizer/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|startoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": "<|endoftext|>", "add_prefix_space": false, "errors": "replace", "do_lower_case": true, "name_or_path": "./clip_ViT_B_32/", "model_max_length": 77}

tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff