File size: 6,041 Bytes
c44f209 e6ee44a c44f209 3ee1182 e6ee44a c44f209 e6ee44a c44f209 e6ee44a c44f209 e6ee44a 9667d9b e6ee44a 9667d9b e6ee44a 9667d9b e6ee44a 9667d9b e6ee44a 9667d9b e6ee44a 9667d9b e6ee44a 9667d9b e6ee44a 9667d9b e6ee44a c44f209 e6ee44a c44f209 e6ee44a c44f209 e6ee44a c44f209 e6ee44a c44f209 e6ee44a c44f209 e6ee44a c44f209 9667d9b c44f209 e6ee44a c44f209 e6ee44a c44f209 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
import gradio as gr
import cv2
import numpy as np
import time
from PIL import Image
import io
import base64
import requests
import json
import os
# Initialize face detector
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
# Ollama server configuration - replace with FaceAPI implementation as needed
OLLAMA_SERVER = "10.100.20.76:11434"
MODEL_NAME = "llama3.2-vision:latest"
def encode_image_to_base64(image_array):
"""Convert numpy image array to base64 encoding required by the Ollama API"""
# Convert numpy array to PIL Image
pil_image = Image.fromarray(cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB))
# Save PIL Image to bytes buffer
buffer = io.BytesIO()
pil_image.save(buffer, format="JPEG")
# Encode bytes to base64
img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
return img_str
def analyze_with_vision_model(image_array):
"""Send image to Ollama vision model and analyze the response"""
try:
# Encode the image to base64
base64_image = encode_image_to_base64(image_array)
# Prepare the prompt for the vision model
prompt = """
Analyze this image and provide the following information:
1. Gender of the person (male or female)
2. Age range (provide a 5-year range, like 20-25)
3. Emotion (happy, sad, angry, neutral, surprised, etc.)
Format your answer exactly like this:
Gender: [gender]
Age: [age range]
Emotion: [emotion]
"""
# Prepare the API request payload
payload = {
"model": MODEL_NAME,
"prompt": prompt,
"images": [base64_image],
"stream": False
}
# Make the API request to Ollama server
response = requests.post(
f"http://{OLLAMA_SERVER}/api/generate",
headers={"Content-Type": "application/json"},
data=json.dumps(payload)
)
if response.status_code != 200:
return "Unknown", "Unknown", "Unknown"
# Parse the response
result = response.json()
generated_text = result.get("response", "")
# Extract information using simple parsing
gender = "Not detected"
age = "Not detected"
emotion = "Not detected"
for line in generated_text.split('\n'):
line = line.strip()
if line.startswith("Gender:"):
gender = line.replace("Gender:", "").strip()
elif line.startswith("Age:"):
age = line.replace("Age:", "").strip()
elif line.startswith("Emotion:"):
emotion = line.replace("Emotion:", "").strip()
return gender, age, emotion
except Exception as e:
print(f"Error analyzing image: {str(e)}")
return "Error", "Error", "Error"
def detect_and_analyze(input_image):
"""Process the uploaded image - detect face and analyze"""
if input_image is None:
return None, "Please upload an image", "", "", ""
# Convert to numpy array if needed
if not isinstance(input_image, np.ndarray):
try:
input_image = np.array(input_image)
except:
return None, "Error processing image", "", "", ""
# Make a copy for display
display_image = input_image.copy()
# Convert to grayscale for face detection
if len(input_image.shape) == 3: # Color image
gray = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY)
else: # Already grayscale
gray = input_image
# Detect faces
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
# If faces are detected
if len(faces) > 0:
# Get the largest face (assuming it's the main subject)
largest_face = max(faces, key=lambda rect: rect[2] * rect[3])
(x, y, w, h) = largest_face
# Extract the face image
face_roi = input_image[y:y+h, x:x+w]
# Draw rectangle around the face
cv2.rectangle(display_image, (x, y), (x+w, y+h), (0, 255, 0), 2)
# Analyze the face
gender, age, emotion = analyze_with_vision_model(face_roi)
return face_roi, "Analysis complete!", gender, age, emotion
return None, "No face detected in the image", "", "", ""
def main():
# Create Gradio interface
with gr.Blocks(title="Face Analysis App") as demo:
gr.Markdown("# Face Analysis App")
gr.Markdown("Upload a face image or take a photo to analyze gender, age, and emotion.")
with gr.Row():
with gr.Column(scale=3):
# For old Gradio versions, use standard Image input
image_input = gr.Image(label="Face Image Input")
analyze_btn = gr.Button("Analyze Face")
with gr.Column(scale=2):
face_output = gr.Image(label="Detected Face")
status_output = gr.Textbox(label="Status")
gender_output = gr.Textbox(label="Gender")
age_output = gr.Textbox(label="Age Range")
emotion_output = gr.Textbox(label="Emotion")
# Connect the components
analyze_btn.click(
fn=detect_and_analyze,
inputs=[image_input],
outputs=[face_output, status_output, gender_output, age_output, emotion_output]
)
gr.Markdown("---")
gr.Markdown("""
### How it works
1. Upload a photo or take a picture with your webcam
2. Click "Analyze Face"
3. The app will detect your face and analyze it
4. Results will show gender, age range, and emotion
For best results, ensure good lighting and position your face clearly in the frame.
""")
# Launch the app
demo.launch(share=True)
if __name__ == "__main__":
main() |