File size: 6,041 Bytes
c44f209
e6ee44a
 
c44f209
3ee1182
e6ee44a
c44f209
 
 
 
e6ee44a
c44f209
 
e6ee44a
c44f209
e6ee44a
 
 
 
 
 
 
9667d9b
e6ee44a
 
 
9667d9b
e6ee44a
 
 
9667d9b
e6ee44a
 
9667d9b
e6ee44a
 
9667d9b
e6ee44a
 
 
 
 
 
9667d9b
e6ee44a
 
 
 
9667d9b
e6ee44a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9667d9b
e6ee44a
c44f209
e6ee44a
 
c44f209
 
 
 
e6ee44a
c44f209
 
 
 
 
 
e6ee44a
c44f209
 
e6ee44a
c44f209
 
 
 
 
e6ee44a
c44f209
 
e6ee44a
c44f209
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9667d9b
c44f209
e6ee44a
c44f209
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6ee44a
c44f209
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import gradio as gr
import cv2
import numpy as np
import time
from PIL import Image
import io
import base64
import requests
import json
import os

# Initialize face detector
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Ollama server configuration - replace with FaceAPI implementation as needed
OLLAMA_SERVER = "10.100.20.76:11434"
MODEL_NAME = "llama3.2-vision:latest"

def encode_image_to_base64(image_array):
    """Convert numpy image array to base64 encoding required by the Ollama API"""
    # Convert numpy array to PIL Image
    pil_image = Image.fromarray(cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB))
    
    # Save PIL Image to bytes buffer
    buffer = io.BytesIO()
    pil_image.save(buffer, format="JPEG")
    
    # Encode bytes to base64
    img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
    return img_str

def analyze_with_vision_model(image_array):
    """Send image to Ollama vision model and analyze the response"""
    try:
        # Encode the image to base64
        base64_image = encode_image_to_base64(image_array)
        
        # Prepare the prompt for the vision model
        prompt = """
        Analyze this image and provide the following information:
        1. Gender of the person (male or female)
        2. Age range (provide a 5-year range, like 20-25)
        3. Emotion (happy, sad, angry, neutral, surprised, etc.)
        
        Format your answer exactly like this:
        Gender: [gender]
        Age: [age range]
        Emotion: [emotion]
        """
        
        # Prepare the API request payload
        payload = {
            "model": MODEL_NAME,
            "prompt": prompt,
            "images": [base64_image],
            "stream": False
        }
        
        # Make the API request to Ollama server
        response = requests.post(
            f"http://{OLLAMA_SERVER}/api/generate",
            headers={"Content-Type": "application/json"},
            data=json.dumps(payload)
        )
        
        if response.status_code != 200:
            return "Unknown", "Unknown", "Unknown"
        
        # Parse the response
        result = response.json()
        generated_text = result.get("response", "")
        
        # Extract information using simple parsing
        gender = "Not detected"
        age = "Not detected"
        emotion = "Not detected"
        
        for line in generated_text.split('\n'):
            line = line.strip()
            if line.startswith("Gender:"):
                gender = line.replace("Gender:", "").strip()
            elif line.startswith("Age:"):
                age = line.replace("Age:", "").strip()
            elif line.startswith("Emotion:"):
                emotion = line.replace("Emotion:", "").strip()
        
        return gender, age, emotion
    
    except Exception as e:
        print(f"Error analyzing image: {str(e)}")
        return "Error", "Error", "Error"

def detect_and_analyze(input_image):
    """Process the uploaded image - detect face and analyze"""
    if input_image is None:
        return None, "Please upload an image", "", "", ""
    
    # Convert to numpy array if needed
    if not isinstance(input_image, np.ndarray):
        try:
            input_image = np.array(input_image)
        except:
            return None, "Error processing image", "", "", ""
    
    # Make a copy for display
    display_image = input_image.copy()
    
    # Convert to grayscale for face detection
    if len(input_image.shape) == 3:  # Color image
        gray = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY)
    else:  # Already grayscale
        gray = input_image
    
    # Detect faces
    faces = face_cascade.detectMultiScale(gray, 1.1, 4)
    
    # If faces are detected
    if len(faces) > 0:
        # Get the largest face (assuming it's the main subject)
        largest_face = max(faces, key=lambda rect: rect[2] * rect[3])
        (x, y, w, h) = largest_face
        
        # Extract the face image
        face_roi = input_image[y:y+h, x:x+w]
        
        # Draw rectangle around the face
        cv2.rectangle(display_image, (x, y), (x+w, y+h), (0, 255, 0), 2)
        
        # Analyze the face
        gender, age, emotion = analyze_with_vision_model(face_roi)
        
        return face_roi, "Analysis complete!", gender, age, emotion
    
    return None, "No face detected in the image", "", "", ""

def main():
    # Create Gradio interface
    with gr.Blocks(title="Face Analysis App") as demo:
        gr.Markdown("# Face Analysis App")
        gr.Markdown("Upload a face image or take a photo to analyze gender, age, and emotion.")
        
        with gr.Row():
            with gr.Column(scale=3):
                # For old Gradio versions, use standard Image input
                image_input = gr.Image(label="Face Image Input")
                analyze_btn = gr.Button("Analyze Face")
            
            with gr.Column(scale=2):
                face_output = gr.Image(label="Detected Face")
                status_output = gr.Textbox(label="Status")
                gender_output = gr.Textbox(label="Gender")
                age_output = gr.Textbox(label="Age Range")
                emotion_output = gr.Textbox(label="Emotion")
        
        # Connect the components
        analyze_btn.click(
            fn=detect_and_analyze,
            inputs=[image_input],
            outputs=[face_output, status_output, gender_output, age_output, emotion_output]
        )
        
        gr.Markdown("---")
        gr.Markdown("""
        ### How it works
        1. Upload a photo or take a picture with your webcam
        2. Click "Analyze Face"
        3. The app will detect your face and analyze it
        4. Results will show gender, age range, and emotion
        
        For best results, ensure good lighting and position your face clearly in the frame.
        """)
    
    # Launch the app
    demo.launch(share=True)

if __name__ == "__main__":
    main()