suhail0318 commited on
Commit
c44f209
·
verified ·
1 Parent(s): 296a843

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -202
app.py CHANGED
@@ -1,25 +1,21 @@
1
- import streamlit as st
2
  import cv2
3
- import requests
4
- import base64
5
- import json
6
  import numpy as np
 
7
  from PIL import Image
8
  import io
9
- import time
 
 
 
10
 
11
- # Page configuration
12
- st.set_page_config(
13
- page_title="Face Analysis with Llama Vision",
14
- page_icon="🧠",
15
- layout="wide"
16
- )
17
 
18
- # Ollama server configuration
19
  OLLAMA_SERVER = "10.100.20.76:11434"
20
  MODEL_NAME = "llama3.2-vision:latest"
21
 
22
- # Function to encode image for the API
23
  def encode_image_to_base64(image_array):
24
  """Convert numpy image array to base64 encoding required by the Ollama API"""
25
  # Convert numpy array to PIL Image
@@ -33,7 +29,6 @@ def encode_image_to_base64(image_array):
33
  img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
34
  return img_str
35
 
36
- # Function to analyze images with the vision model
37
  def analyze_with_vision_model(image_array):
38
  """Send image to Ollama vision model and analyze the response"""
39
  try:
@@ -92,201 +87,91 @@ def analyze_with_vision_model(image_array):
92
  return gender, age, emotion
93
 
94
  except Exception as e:
95
- st.error(f"Error analyzing image: {str(e)}")
96
  return "Error", "Error", "Error"
97
 
98
- # App title and description
99
- st.title("Face Analysis with Delay & Single Capture")
100
- st.write("This app waits for 7 seconds to let you position yourself, then detects and analyzes your face.")
101
-
102
- # Create layout
103
- col1, col2 = st.columns([3, 2])
104
-
105
- # Webcam display in column 1
106
- with col1:
107
- st.write("### Webcam Feed")
108
- webcam_placeholder = st.empty()
109
-
110
- # Results display in column 2
111
- with col2:
112
- st.write("### Captured Face")
113
- face_placeholder = st.empty()
114
 
115
- st.write("### Analysis Results")
116
- result_container = st.container()
117
- analysis_status = result_container.empty()
118
- gender_text = result_container.empty()
119
- age_text = result_container.empty()
120
- emotion_text = result_container.empty()
121
-
122
- # Initialize session state variables
123
- if 'face_captured' not in st.session_state:
124
- st.session_state.face_captured = False
125
- if 'captured_face' not in st.session_state:
126
- st.session_state.captured_face = None
127
- if 'capture_in_progress' not in st.session_state:
128
- st.session_state.capture_in_progress = False
129
- if 'start_time' not in st.session_state:
130
- st.session_state.start_time = None
131
-
132
- # Function to reset the app state
133
- def reset_app():
134
- st.session_state.face_captured = False
135
- st.session_state.captured_face = None
136
- st.session_state.capture_in_progress = False
137
- st.session_state.start_time = None
138
-
139
- # Create buttons
140
- col_btn1, col_btn2 = st.columns(2)
141
- with col_btn1:
142
- start_button = st.button("Start Webcam", key="start")
143
- with col_btn2:
144
- reset_button = st.button("Reset", key="reset", on_click=reset_app)
145
-
146
- if reset_button:
147
- st.rerun()
148
-
149
- if start_button or st.session_state.capture_in_progress:
150
- # Set capture in progress flag
151
- st.session_state.capture_in_progress = True
152
 
153
- # Initialize face detector
154
- face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
155
 
156
- # Open webcam
157
- cap = cv2.VideoCapture(0)
 
 
 
158
 
159
- # Set the start time if it's not already set
160
- if st.session_state.start_time is None:
161
- st.session_state.start_time = time.time()
162
 
163
- try:
164
- # If we haven't captured a face yet
165
- if not st.session_state.face_captured:
166
- # Define the warm-up period (in seconds)
167
- warmup_period = 7 # seconds to wait before starting detection
168
-
169
- # Loop until we capture a face
170
- while True:
171
- # Capture frame from webcam
172
- ret, frame = cap.read()
173
- if not ret:
174
- st.error("Could not access webcam.")
175
- break
176
-
177
- # Calculate elapsed time
178
- elapsed_time = time.time() - st.session_state.start_time
179
- remaining_time = max(0, warmup_period - elapsed_time)
180
-
181
- # Make a copy for display
182
- display_frame = frame.copy()
183
-
184
- # During warm-up period, just show the webcam feed with countdown
185
- if elapsed_time < warmup_period:
186
- # Add countdown text to the frame
187
- cv2.putText(
188
- display_frame,
189
- f"Getting ready... {int(remaining_time)}s",
190
- (50, 50),
191
- cv2.FONT_HERSHEY_SIMPLEX,
192
- 1,
193
- (0, 255, 255),
194
- 2
195
- )
196
-
197
- analysis_status.info(f"Please position yourself... Starting detection in {int(remaining_time)} seconds")
198
- else:
199
- # After warm-up, start face detection
200
- analysis_status.info("Detecting face...")
201
-
202
- # Convert to grayscale for face detection
203
- gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
204
-
205
- # Detect faces
206
- faces = face_cascade.detectMultiScale(gray, 1.1, 4)
207
-
208
- # If faces are detected
209
- if len(faces) > 0:
210
- # Get the largest face (assuming it's the main subject)
211
- largest_face = max(faces, key=lambda rect: rect[2] * rect[3])
212
- (x, y, w, h) = largest_face
213
-
214
- # Draw rectangle around the face
215
- cv2.rectangle(display_frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
216
-
217
- # Extract the face image
218
- face_roi = frame[y:y+h, x:x+w]
219
-
220
- if face_roi.size > 0:
221
- # Capture the face
222
- st.session_state.captured_face = face_roi.copy()
223
- st.session_state.face_captured = True
224
-
225
- # Display the captured face
226
- face_rgb = cv2.cvtColor(face_roi, cv2.COLOR_BGR2RGB)
227
- face_placeholder.image(face_rgb, caption="Captured Face", channels="RGB")
228
-
229
- break
230
-
231
- # Add detecting text to the frame
232
- cv2.putText(
233
- display_frame,
234
- "Detecting face...",
235
- (50, 50),
236
- cv2.FONT_HERSHEY_SIMPLEX,
237
- 1,
238
- (0, 255, 0),
239
- 2
240
- )
241
-
242
- # Convert BGR to RGB for display
243
- display_rgb = cv2.cvtColor(display_frame, cv2.COLOR_BGR2RGB)
244
-
245
- # Update the webcam feed
246
- webcam_placeholder.image(display_rgb, caption="Camera Feed", channels="RGB")
247
-
248
- # Short delay to control frame rate
249
- time.sleep(0.1)
250
-
251
- # If we've already captured a face, analyze it
252
- if st.session_state.face_captured and st.session_state.captured_face is not None:
253
- # Display the analysis status
254
- analysis_status.info("Analyzing captured face...")
255
-
256
- # Analyze the face
257
- gender, age, emotion = analyze_with_vision_model(st.session_state.captured_face)
258
-
259
- # Display results
260
- analysis_status.success("Analysis complete!")
261
- gender_text.markdown(f"**Gender:** {gender}")
262
- age_text.markdown(f"**Age:** {age}")
263
- emotion_text.markdown(f"**Emotion:** {emotion}")
264
-
265
- # Reset the capture in progress flag
266
- st.session_state.capture_in_progress = False
267
-
268
- # Display a final frame with the detected face
269
- if st.session_state.captured_face is not None:
270
- face_rgb = cv2.cvtColor(st.session_state.captured_face, cv2.COLOR_BGR2RGB)
271
- face_placeholder.image(face_rgb, caption="Captured Face", channels="RGB")
272
-
273
- except Exception as e:
274
- st.error(f"An error occurred: {str(e)}")
275
 
276
- finally:
277
- # Release webcam when done
278
- cap.release()
279
 
280
- # Add some information at the bottom
281
- st.markdown("---")
282
- st.markdown("""
283
- ### How it works
284
- 1. Click "Start Webcam" to begin
285
- 2. The app will show your webcam feed for 7 seconds to let you position yourself
286
- 3. After the countdown, it will automatically detect and capture your face
287
- 4. The captured face is sent to the Llama 3.2 Vision model for analysis
288
- 5. Results show gender, age range, and emotion
289
- 6. Click "Reset" to start over
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
- For best results, ensure good lighting and position your face clearly in the frame.
292
- """)
 
1
+ import gradio as gr
2
  import cv2
 
 
 
3
  import numpy as np
4
+ import time
5
  from PIL import Image
6
  import io
7
+ import base64
8
+ import requests
9
+ import json
10
+ import os
11
 
12
+ # Initialize face detector
13
+ face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
 
 
 
 
14
 
15
+ # Ollama server configuration - replace with FaceAPI implementation as needed
16
  OLLAMA_SERVER = "10.100.20.76:11434"
17
  MODEL_NAME = "llama3.2-vision:latest"
18
 
 
19
  def encode_image_to_base64(image_array):
20
  """Convert numpy image array to base64 encoding required by the Ollama API"""
21
  # Convert numpy array to PIL Image
 
29
  img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
30
  return img_str
31
 
 
32
  def analyze_with_vision_model(image_array):
33
  """Send image to Ollama vision model and analyze the response"""
34
  try:
 
87
  return gender, age, emotion
88
 
89
  except Exception as e:
90
+ print(f"Error analyzing image: {str(e)}")
91
  return "Error", "Error", "Error"
92
 
93
+ def detect_and_analyze(input_image):
94
+ """Process the uploaded image - detect face and analyze"""
95
+ if input_image is None:
96
+ return None, "Please upload an image", "", "", ""
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ # Convert to numpy array if needed
99
+ if not isinstance(input_image, np.ndarray):
100
+ try:
101
+ input_image = np.array(input_image)
102
+ except:
103
+ return None, "Error processing image", "", "", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ # Make a copy for display
106
+ display_image = input_image.copy()
107
 
108
+ # Convert to grayscale for face detection
109
+ if len(input_image.shape) == 3: # Color image
110
+ gray = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY)
111
+ else: # Already grayscale
112
+ gray = input_image
113
 
114
+ # Detect faces
115
+ faces = face_cascade.detectMultiScale(gray, 1.1, 4)
 
116
 
117
+ # If faces are detected
118
+ if len(faces) > 0:
119
+ # Get the largest face (assuming it's the main subject)
120
+ largest_face = max(faces, key=lambda rect: rect[2] * rect[3])
121
+ (x, y, w, h) = largest_face
122
+
123
+ # Extract the face image
124
+ face_roi = input_image[y:y+h, x:x+w]
125
+
126
+ # Draw rectangle around the face
127
+ cv2.rectangle(display_image, (x, y), (x+w, y+h), (0, 255, 0), 2)
128
+
129
+ # Analyze the face
130
+ gender, age, emotion = analyze_with_vision_model(face_roi)
131
+
132
+ return face_roi, "Analysis complete!", gender, age, emotion
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
+ return None, "No face detected in the image", "", "", ""
 
 
135
 
136
+ def main():
137
+ # Create Gradio interface
138
+ with gr.Blocks(title="Face Analysis App") as demo:
139
+ gr.Markdown("# Face Analysis App")
140
+ gr.Markdown("Upload a face image or take a photo to analyze gender, age, and emotion.")
141
+
142
+ with gr.Row():
143
+ with gr.Column(scale=3):
144
+ # For old Gradio versions, use standard Image input
145
+ image_input = gr.Image(label="Face Image Input")
146
+ analyze_btn = gr.Button("Analyze Face")
147
+
148
+ with gr.Column(scale=2):
149
+ face_output = gr.Image(label="Detected Face")
150
+ status_output = gr.Textbox(label="Status")
151
+ gender_output = gr.Textbox(label="Gender")
152
+ age_output = gr.Textbox(label="Age Range")
153
+ emotion_output = gr.Textbox(label="Emotion")
154
+
155
+ # Connect the components
156
+ analyze_btn.click(
157
+ fn=detect_and_analyze,
158
+ inputs=[image_input],
159
+ outputs=[face_output, status_output, gender_output, age_output, emotion_output]
160
+ )
161
+
162
+ gr.Markdown("---")
163
+ gr.Markdown("""
164
+ ### How it works
165
+ 1. Upload a photo or take a picture with your webcam
166
+ 2. Click "Analyze Face"
167
+ 3. The app will detect your face and analyze it
168
+ 4. Results will show gender, age range, and emotion
169
+
170
+ For best results, ensure good lighting and position your face clearly in the frame.
171
+ """)
172
+
173
+ # Launch the app
174
+ demo.launch(share=True)
175
 
176
+ if __name__ == "__main__":
177
+ main()