Spaces:
Sleeping
Sleeping
File size: 4,126 Bytes
d83f3b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
# define analysis engine
from google.genai.types import Tool, GenerateContentConfig, GoogleSearch
from google.genai import types
from google import genai
from io import BytesIO
from PIL import Image
import pandas as pd
import gradio as gr
import base64
import json
import os
def cv2net(image_path,api_key):
# authenticate gemini client
client = genai.Client(api_key=api_key)
# call Google Search tool
google_search_tool = Tool(
google_search = GoogleSearch()
)
with open(image_path, 'rb') as f:
image_data = f.read()
prompt = """
I want you to carefully analyze the image(s) and map the functional relationship between every single identified entity in the image.
Do not ignore small or partially visible items. Collect the following information from the image(s) and DO NOT include items, objects, or things that are not in the image(s):
- Specific object name or person
- Precise functional relationship verb
- Class: object, person, animal, environment, text, brand
- Primary function or role
- Dominant color
- Small, medium, large, tiny, huge
- Material type
- Location description
- Current condition
- Spatial context
- Setting or environment
- Relationship strength: strong, medium, weak
- Spatial context
- Scene context
- Confidence: high, medium, low
- Today's date (YYYY-MM-DD)
Ignore what a person in an image is wearing. Return the results as one JSON file with the following structure exactly:
```json
[
{
"Vertex1": "specific_object_name_or_person",
"Vertex2": "specific_object_name_or_person",
"Relationship": "precise_functional_relationship_verb",
"Vertex1_class": "Object|Person|Animal|Environment|Text|Brand",
"Vertex1_purpose": "primary_function_or_role",
"Vertex1_size": "small|medium|large|tiny|huge",
"Vertex1_position": "location_description",
"Vertex1_state": "current_condition",
"Vertex2_class": "Object|Person|Animal|Environment|Text|Brand",
"Vertex2_purpose": "primary_function_or_role",
"Vertex2_size": "small|medium|large|tiny|huge",
"Vertex2_position": "location_description",
"Vertex2_state": "current_condition",
"Relationship_type": "spatial|functional|contextual|interactive",
"Relationship_strength": "strong|medium|weak",
"Spatial_context": "detailed_spatial_description",
"Scene_context": "setting_or_environment",
"Confidence": "high|medium|low",
"Date": "today's_date"
}
]
```
Here is an example JSON output:
```json
[
{
"Vertex1": "Man",
"Vertex2": "Bench",
"Relationship": "Sits on",
"Vertex1_class": "Person",
"Vertex1_purpose": "Posing for photo",
"Vertex1_size": "Medium",
"Vertex1_position": "Left foreground",
"Vertex1_state": "Visible",
"Vertex2_class": "Object",
"Vertex2_purpose": "A seat",
"Vertex2_size": "Medium",
"Vertex2_position": "Middle ground",
"Vertex2_state": "Visible",
"Relationship_type": "Functional",
"Relationship_strength": "Strong",
"Spatial_context": "Man is sitting on bench",
"Scene_context": "Outdoor scene in the park",
"Confidence": "High",
"Date": "2025-07-16"
}
]
```
"""
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=[types.Part.from_bytes(data=image_data, mime_type="image/jpeg"), prompt],
config=GenerateContentConfig(
tools=[google_search_tool],
response_modalities=["TEXT"],
response_mime_type="application/json",
)
)
try:
# convert response from string to JSON
json_file = json.loads(response.text)
# convert JSON into a DataFrame
df = pd.DataFrame(json_file)
return df
except json.JSONDecodeError as e:
print(f"Error decoding JSON for image: {image_data} - {e}")
return None
|