import gradio as gr import cv2 import numpy as np import time from PIL import Image import io import base64 import requests import json import os # Initialize face detector face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') # Ollama server configuration - replace with FaceAPI implementation as needed OLLAMA_SERVER = "10.100.20.76:11434" MODEL_NAME = "llama3.2-vision:latest" def encode_image_to_base64(image_array): """Convert numpy image array to base64 encoding required by the Ollama API""" # Convert numpy array to PIL Image pil_image = Image.fromarray(cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)) # Save PIL Image to bytes buffer buffer = io.BytesIO() pil_image.save(buffer, format="JPEG") # Encode bytes to base64 img_str = base64.b64encode(buffer.getvalue()).decode('utf-8') return img_str def analyze_with_vision_model(image_array): """Send image to Ollama vision model and analyze the response""" try: # Encode the image to base64 base64_image = encode_image_to_base64(image_array) # Prepare the prompt for the vision model prompt = """ Analyze this image and provide the following information: 1. Gender of the person (male or female) 2. Age range (provide a 5-year range, like 20-25) 3. Emotion (happy, sad, angry, neutral, surprised, etc.) Format your answer exactly like this: Gender: [gender] Age: [age range] Emotion: [emotion] """ # Prepare the API request payload payload = { "model": MODEL_NAME, "prompt": prompt, "images": [base64_image], "stream": False } # Make the API request to Ollama server response = requests.post( f"http://{OLLAMA_SERVER}/api/generate", headers={"Content-Type": "application/json"}, data=json.dumps(payload) ) if response.status_code != 200: return "Unknown", "Unknown", "Unknown" # Parse the response result = response.json() generated_text = result.get("response", "") # Extract information using simple parsing gender = "Not detected" age = "Not detected" emotion = "Not detected" for line in generated_text.split('\n'): line = line.strip() if line.startswith("Gender:"): gender = line.replace("Gender:", "").strip() elif line.startswith("Age:"): age = line.replace("Age:", "").strip() elif line.startswith("Emotion:"): emotion = line.replace("Emotion:", "").strip() return gender, age, emotion except Exception as e: print(f"Error analyzing image: {str(e)}") return "Error", "Error", "Error" def detect_and_analyze(input_image): """Process the uploaded image - detect face and analyze""" if input_image is None: return None, "Please upload an image", "", "", "" # Convert to numpy array if needed if not isinstance(input_image, np.ndarray): try: input_image = np.array(input_image) except: return None, "Error processing image", "", "", "" # Make a copy for display display_image = input_image.copy() # Convert to grayscale for face detection if len(input_image.shape) == 3: # Color image gray = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY) else: # Already grayscale gray = input_image # Detect faces faces = face_cascade.detectMultiScale(gray, 1.1, 4) # If faces are detected if len(faces) > 0: # Get the largest face (assuming it's the main subject) largest_face = max(faces, key=lambda rect: rect[2] * rect[3]) (x, y, w, h) = largest_face # Extract the face image face_roi = input_image[y:y+h, x:x+w] # Draw rectangle around the face cv2.rectangle(display_image, (x, y), (x+w, y+h), (0, 255, 0), 2) # Analyze the face gender, age, emotion = analyze_with_vision_model(face_roi) return face_roi, "Analysis complete!", gender, age, emotion return None, "No face detected in the image", "", "", "" def main(): # Create Gradio interface with gr.Blocks(title="Face Analysis App") as demo: gr.Markdown("# Face Analysis App") gr.Markdown("Upload a face image or take a photo to analyze gender, age, and emotion.") with gr.Row(): with gr.Column(scale=3): # For old Gradio versions, use standard Image input image_input = gr.Image(label="Face Image Input") analyze_btn = gr.Button("Analyze Face") with gr.Column(scale=2): face_output = gr.Image(label="Detected Face") status_output = gr.Textbox(label="Status") gender_output = gr.Textbox(label="Gender") age_output = gr.Textbox(label="Age Range") emotion_output = gr.Textbox(label="Emotion") # Connect the components analyze_btn.click( fn=detect_and_analyze, inputs=[image_input], outputs=[face_output, status_output, gender_output, age_output, emotion_output] ) gr.Markdown("---") gr.Markdown(""" ### How it works 1. Upload a photo or take a picture with your webcam 2. Click "Analyze Face" 3. The app will detect your face and analyze it 4. Results will show gender, age range, and emotion For best results, ensure good lighting and position your face clearly in the frame. """) # Launch the app demo.launch(share=True) if __name__ == "__main__": main()