Image2Network / CV2Net.py
Ifeanyi's picture
Upload 3 files
d83f3b2 verified
# define analysis engine
from google.genai.types import Tool, GenerateContentConfig, GoogleSearch
from google.genai import types
from google import genai
from io import BytesIO
from PIL import Image
import pandas as pd
import gradio as gr
import base64
import json
import os
def cv2net(image_path,api_key):
# authenticate gemini client
client = genai.Client(api_key=api_key)
# call Google Search tool
google_search_tool = Tool(
google_search = GoogleSearch()
)
with open(image_path, 'rb') as f:
image_data = f.read()
prompt = """
I want you to carefully analyze the image(s) and map the functional relationship between every single identified entity in the image.
Do not ignore small or partially visible items. Collect the following information from the image(s) and DO NOT include items, objects, or things that are not in the image(s):
- Specific object name or person
- Precise functional relationship verb
- Class: object, person, animal, environment, text, brand
- Primary function or role
- Dominant color
- Small, medium, large, tiny, huge
- Material type
- Location description
- Current condition
- Spatial context
- Setting or environment
- Relationship strength: strong, medium, weak
- Spatial context
- Scene context
- Confidence: high, medium, low
- Today's date (YYYY-MM-DD)
Ignore what a person in an image is wearing. Return the results as one JSON file with the following structure exactly:
```json
[
{
"Vertex1": "specific_object_name_or_person",
"Vertex2": "specific_object_name_or_person",
"Relationship": "precise_functional_relationship_verb",
"Vertex1_class": "Object|Person|Animal|Environment|Text|Brand",
"Vertex1_purpose": "primary_function_or_role",
"Vertex1_size": "small|medium|large|tiny|huge",
"Vertex1_position": "location_description",
"Vertex1_state": "current_condition",
"Vertex2_class": "Object|Person|Animal|Environment|Text|Brand",
"Vertex2_purpose": "primary_function_or_role",
"Vertex2_size": "small|medium|large|tiny|huge",
"Vertex2_position": "location_description",
"Vertex2_state": "current_condition",
"Relationship_type": "spatial|functional|contextual|interactive",
"Relationship_strength": "strong|medium|weak",
"Spatial_context": "detailed_spatial_description",
"Scene_context": "setting_or_environment",
"Confidence": "high|medium|low",
"Date": "today's_date"
}
]
```
Here is an example JSON output:
```json
[
{
"Vertex1": "Man",
"Vertex2": "Bench",
"Relationship": "Sits on",
"Vertex1_class": "Person",
"Vertex1_purpose": "Posing for photo",
"Vertex1_size": "Medium",
"Vertex1_position": "Left foreground",
"Vertex1_state": "Visible",
"Vertex2_class": "Object",
"Vertex2_purpose": "A seat",
"Vertex2_size": "Medium",
"Vertex2_position": "Middle ground",
"Vertex2_state": "Visible",
"Relationship_type": "Functional",
"Relationship_strength": "Strong",
"Spatial_context": "Man is sitting on bench",
"Scene_context": "Outdoor scene in the park",
"Confidence": "High",
"Date": "2025-07-16"
}
]
```
"""
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=[types.Part.from_bytes(data=image_data, mime_type="image/jpeg"), prompt],
config=GenerateContentConfig(
tools=[google_search_tool],
response_modalities=["TEXT"],
response_mime_type="application/json",
)
)
try:
# convert response from string to JSON
json_file = json.loads(response.text)
# convert JSON into a DataFrame
df = pd.DataFrame(json_file)
return df
except json.JSONDecodeError as e:
print(f"Error decoding JSON for image: {image_data} - {e}")
return None