Ifeanyi commited on
Commit
d83f3b2
·
verified ·
1 Parent(s): b3399e1

Upload 3 files

Browse files
Files changed (3) hide show
  1. CV2Net.py +118 -0
  2. Download.py +8 -0
  3. ImageNet.py +27 -0
CV2Net.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # define analysis engine
2
+ from google.genai.types import Tool, GenerateContentConfig, GoogleSearch
3
+ from google.genai import types
4
+ from google import genai
5
+ from io import BytesIO
6
+ from PIL import Image
7
+ import pandas as pd
8
+ import gradio as gr
9
+ import base64
10
+ import json
11
+ import os
12
+
13
+
14
+ def cv2net(image_path,api_key):
15
+ # authenticate gemini client
16
+ client = genai.Client(api_key=api_key)
17
+
18
+ # call Google Search tool
19
+ google_search_tool = Tool(
20
+ google_search = GoogleSearch()
21
+ )
22
+
23
+ with open(image_path, 'rb') as f:
24
+ image_data = f.read()
25
+
26
+ prompt = """
27
+ I want you to carefully analyze the image(s) and map the functional relationship between every single identified entity in the image.
28
+ Do not ignore small or partially visible items. Collect the following information from the image(s) and DO NOT include items, objects, or things that are not in the image(s):
29
+ - Specific object name or person
30
+ - Precise functional relationship verb
31
+ - Class: object, person, animal, environment, text, brand
32
+ - Primary function or role
33
+ - Dominant color
34
+ - Small, medium, large, tiny, huge
35
+ - Material type
36
+ - Location description
37
+ - Current condition
38
+ - Spatial context
39
+ - Setting or environment
40
+ - Relationship strength: strong, medium, weak
41
+ - Spatial context
42
+ - Scene context
43
+ - Confidence: high, medium, low
44
+ - Today's date (YYYY-MM-DD)
45
+ Ignore what a person in an image is wearing. Return the results as one JSON file with the following structure exactly:
46
+ ```json
47
+ [
48
+ {
49
+ "Vertex1": "specific_object_name_or_person",
50
+ "Vertex2": "specific_object_name_or_person",
51
+ "Relationship": "precise_functional_relationship_verb",
52
+ "Vertex1_class": "Object|Person|Animal|Environment|Text|Brand",
53
+ "Vertex1_purpose": "primary_function_or_role",
54
+ "Vertex1_size": "small|medium|large|tiny|huge",
55
+ "Vertex1_position": "location_description",
56
+ "Vertex1_state": "current_condition",
57
+ "Vertex2_class": "Object|Person|Animal|Environment|Text|Brand",
58
+ "Vertex2_purpose": "primary_function_or_role",
59
+ "Vertex2_size": "small|medium|large|tiny|huge",
60
+ "Vertex2_position": "location_description",
61
+ "Vertex2_state": "current_condition",
62
+ "Relationship_type": "spatial|functional|contextual|interactive",
63
+ "Relationship_strength": "strong|medium|weak",
64
+ "Spatial_context": "detailed_spatial_description",
65
+ "Scene_context": "setting_or_environment",
66
+ "Confidence": "high|medium|low",
67
+ "Date": "today's_date"
68
+ }
69
+ ]
70
+ ```
71
+ Here is an example JSON output:
72
+ ```json
73
+ [
74
+ {
75
+ "Vertex1": "Man",
76
+ "Vertex2": "Bench",
77
+ "Relationship": "Sits on",
78
+ "Vertex1_class": "Person",
79
+ "Vertex1_purpose": "Posing for photo",
80
+ "Vertex1_size": "Medium",
81
+ "Vertex1_position": "Left foreground",
82
+ "Vertex1_state": "Visible",
83
+ "Vertex2_class": "Object",
84
+ "Vertex2_purpose": "A seat",
85
+ "Vertex2_size": "Medium",
86
+ "Vertex2_position": "Middle ground",
87
+ "Vertex2_state": "Visible",
88
+ "Relationship_type": "Functional",
89
+ "Relationship_strength": "Strong",
90
+ "Spatial_context": "Man is sitting on bench",
91
+ "Scene_context": "Outdoor scene in the park",
92
+ "Confidence": "High",
93
+ "Date": "2025-07-16"
94
+ }
95
+ ]
96
+ ```
97
+ """
98
+
99
+ response = client.models.generate_content(
100
+ model="gemini-2.0-flash",
101
+ contents=[types.Part.from_bytes(data=image_data, mime_type="image/jpeg"), prompt],
102
+ config=GenerateContentConfig(
103
+ tools=[google_search_tool],
104
+ response_modalities=["TEXT"],
105
+ response_mime_type="application/json",
106
+ )
107
+ )
108
+
109
+ try:
110
+ # convert response from string to JSON
111
+ json_file = json.loads(response.text)
112
+
113
+ # convert JSON into a DataFrame
114
+ df = pd.DataFrame(json_file)
115
+ return df
116
+ except json.JSONDecodeError as e:
117
+ print(f"Error decoding JSON for image: {image_data} - {e}")
118
+ return None
Download.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def save_csv(df):
4
+ if df is None or len(df) == 0:
5
+ return None
6
+ file_path = "data.csv"
7
+ df.to_csv(file_path, index=False)
8
+ return file_path
ImageNet.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from CV2Net import cv2net
2
+ import gradio as gr
3
+ import pandas as pd
4
+
5
+ def image2net(img_file,api_key):
6
+ try:
7
+ if isinstance(img_file, list) and len(img_file) > 1:
8
+ df_list = []
9
+ for i in img_file:
10
+ df_list.append(cv2net(i,api_key))
11
+
12
+ else:
13
+ df_list = [cv2net(img_file,api_key)]
14
+
15
+ except Exception:
16
+ gr.Info("The model is overloaded. Please try again later!")
17
+
18
+ # Filter out None values before concatenating
19
+ valid_dfs = [df for df in df_list if df is not None]
20
+
21
+ if valid_dfs:
22
+ df = pd.concat(valid_dfs)
23
+ file_path = "network_data.csv"
24
+ df.to_csv(file_path, index=False)
25
+ return df, file_path
26
+ else:
27
+ return None, None