facial_expression_recognition

Running

File size: 11,365 Bytes


import cv2 as cv
import numpy as np
import gradio as gr
from pathlib import Path
from collections import Counter, defaultdict
from huggingface_hub import hf_hub_download

from facial_fer_model import FacialExpressionRecog
from yunet import YuNet

# Download ONNX-modellen
FD_MODEL_PATH = hf_hub_download(repo_id="opencv/face_detection_yunet", filename="face_detection_yunet_2023mar.onnx")
FER_MODEL_PATH = hf_hub_download(repo_id="opencv/facial_expression_recognition", filename="facial_expression_recognition_mobilefacenet_2022july.onnx")

backend_id = cv.dnn.DNN_BACKEND_OPENCV
target_id = cv.dnn.DNN_TARGET_CPU

fer_model = FacialExpressionRecog(modelPath=FER_MODEL_PATH, backendId=backend_id, targetId=target_id)
detect_model = YuNet(modelPath=FD_MODEL_PATH)

# EN -> NL mapping (lowercase) incl. varianten/typo's
EN_TO_NL = {
    "neutral": "neutraal",

    "happy": "blij",
    "happiness": "blij",

    "sad": "verdrietig",
    "sadness": "verdrietig",

    "surprise": "verrast",
    "surprised": "verrast",
    "supprised": "verrast",   # typo
    "surprized": "verrast",

    "angry": "boos",
    "anger": "boos",

    "disgust": "walging",

    "fear": "angstig",
    "fearful": "angstig",
    "fearfull": "angstig",    # typo

    "contempt": "minachting",

    "unknown": "onbekend",
}

def to_dutch_lower(label: str) -> str:
    if not label:
        return "onbekend"
    key = label.strip().lower()
    return EN_TO_NL.get(key, key)

emotion_stats = defaultdict(int)

# Confidence helpers
def _format_pct(conf):
    if conf is None:
        return None
    try:
        c = float(conf)
    except Exception:
        return None
    if c <= 1.0:
        c *= 100.0
    c = max(0.0, min(100.0, c))
    return f"{int(round(c))}%"

def _parse_infer_output(result):
    if isinstance(result, np.ndarray):
        arr = result
        if arr.ndim == 1 and arr.size > 1:
            idx = int(np.argmax(arr))
            conf = float(arr[idx])
            return idx, conf
        elif arr.size == 1:
            return int(arr.flat[0]), None
        else:
            try:
                idx = int(arr[0])
                return idx, None
            except Exception:
                return 0, None

    if isinstance(result, (list, tuple)):
        if len(result) >= 2 and isinstance(result[1], (float, np.floating, int, np.integer)):
            try:
                return int(result[0]), float(result[1])
            except Exception:
                pass
        if len(result) >= 1:
            try:
                return int(result[0]), None
            except Exception:
                return 0, None

    try:
        return int(result), None
    except Exception:
        return 0, None

def visualize(image, det_res, labels, confs):
    output = image.copy()
    landmark_color = [(255, 0, 0), (0, 0, 255), (0, 255, 0), (255, 0, 255), (0, 255, 255)]
    for i, (det, lab) in enumerate(zip(det_res, labels)):
        bbox = det[0:4].astype(np.int32)
        label_en = FacialExpressionRecog.getDesc(lab)
        fer_type_str_nl = to_dutch_lower(label_en)
        pct = _format_pct(confs[i] if i < len(confs) else None)
        txt = f"{fer_type_str_nl}" + (f" {pct}" if pct else "")

        cv.rectangle(output, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 0), 2)
        cv.putText(output, txt, (bbox[0], max(0, bbox[1] - 10)), cv.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv.LINE_AA)

        landmarks = det[4:14].astype(np.int32).reshape((5, 2))
        for idx, landmark in enumerate(landmarks):
            cv.circle(output, landmark, 2, landmark_color[idx], 2)
    return output

def summarize_emotions(labels, confs):
    if not labels:
        return "## **geen gezicht gedetecteerd**"

    names_nl = [to_dutch_lower(FacialExpressionRecog.getDesc(lab)) for lab in labels]
    counts = Counter(names_nl)
    conf_bucket = defaultdict(list)
    for i, name in enumerate(names_nl):
        if i < len(confs) and confs[i] is not None:
            conf_bucket[name].append(float(confs[i]))

    top = counts.most_common(1)[0][0]
    parts = []
    for name, n in sorted(counts.items(), key=lambda kv: (-kv[1], kv[0])):
        if conf_bucket[name]:
            avg = sum(conf_bucket[name]) / len(conf_bucket[name])
            parts.append(f"{name} ({n}, gem. {_format_pct(avg)})")
        else:
            parts.append(f"{name} ({n})")
    details = ", ".join(parts)

    return f"# **{top}**\n\n_Gedetecteerde emoties: {details}_"

def process_image(input_image):
    image = cv.cvtColor(input_image, cv.COLOR_RGB2BGR)
    h, w, _ = image.shape
    detect_model.setInputSize([w, h])
    dets = detect_model.infer(image)
    if dets is None:
        return cv.cvtColor(image, cv.COLOR_BGR2RGB), [], [], None
    labels, confs = [], []
    for face_points in dets:
        raw = fer_model.infer(image, face_points[:-1])
        lab, conf = _parse_infer_output(raw)
        labels.append(lab)
        confs.append(conf)
    output = visualize(image, dets, labels, confs)
    return cv.cvtColor(output, cv.COLOR_BGR2RGB), labels, confs, dets

def detect_expression(input_image):
    output_img, labels, confs, _ = process_image(input_image)
    emotion_md = summarize_emotions(labels, confs)
    for lab in labels:
        name_nl = to_dutch_lower(FacialExpressionRecog.getDesc(lab))
        emotion_stats[name_nl] += 1
    stats_plot = draw_bar_chart_cv(emotion_stats)
    return output_img, emotion_md, stats_plot

def detect_expression_no_stats(input_image):
    output_img, labels, confs, _ = process_image(input_image)
    emotion_md = summarize_emotions(labels, confs)
    return output_img, emotion_md

def draw_bar_chart_cv(stats: dict, width=640, height=320):
    img = np.full((height, width, 3), 255, dtype=np.uint8)
    cv.putText(img, "Live emotie-statistieken", (12, 28), cv.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2, cv.LINE_AA)
    if not stats:
        cv.putText(img, "Nog geen statistieken", (12, height//2), cv.FONT_HERSHEY_SIMPLEX, 0.9, (128, 128, 128), 2, cv.LINE_AA)
        return cv.cvtColor(img, cv.COLOR_BGR2RGB)

    left, right, top, bottom = 60, 20, 50, 40
    plot_w = width - left - right
    plot_h = height - top - bottom
    origin = (left, height - bottom)

    cv.line(img, origin, (left + plot_w, height - bottom), (0, 0, 0), 2)
    cv.line(img, origin, (left, height - bottom - plot_h), (0, 0, 0), 2)

    labels = list(stats.keys())
    values = [stats[k] for k in labels]
    max_val = max(values) if max(values) > 0 else 1

    n = len(labels)
    gap = 12
    bar_w = max(10, int((plot_w - gap * (n + 1)) / max(1, n)))

    for i, (lab, val) in enumerate(zip(labels, values)):
        x1 = left + gap + i * (bar_w + gap)
        x2 = x1 + bar_w
        h_px = int((val / max_val) * (plot_h - 10))
        y1 = height - bottom - h_px
        y2 = height - bottom - 1
        cv.rectangle(img, (x1, y1), (x2, y2), (0, 170, 60), -1)
        cv.putText(img, str(val), (x1 + 2, y1 - 6), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 90, 30), 1, cv.LINE_AA)

        show_lab = lab if len(lab) <= 12 else lab[:11] + "…"
        (tw, th), _ = cv.getTextSize(show_lab, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        tx = x1 + (bar_w - tw) // 2
        ty = height - bottom + th + 12
        cv.putText(img, show_lab, (tx, ty), cv.FONT_HERSHEY_SIMPLEX, 0.5, (40, 40, 40), 1, cv.LINE_AA)

    return cv.cvtColor(img, cv.COLOR_BGR2RGB)

IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".webp"}
EXAMPLES_DIR = Path("examples")
if EXAMPLES_DIR.exists() and EXAMPLES_DIR.is_dir():
    example_paths = [str(p) for p in sorted(EXAMPLES_DIR.iterdir()) if Path(p).suffix.lower() in IMAGE_EXTS]
else:
    example_paths = []
example_list = [[p] for p in example_paths]
CACHE_EXAMPLES = bool(example_list)

INFO_HTML = """
<div>
  <h3>Hoe werkt deze gezichtsuitdrukking-herkenner?</h3>
  <p>Dit model kan automatisch acht emoties herkennen in een foto van een gezicht:</p>
  <ul>
    <li>neutraal</li>
    <li>blij</li>
    <li>verdrietig</li>
    <li>verrast</li>
    <li>boos</li>
    <li>walging</li>
    <li>angstig</li>
    <li>minachting</li>
  </ul>
  <p>Je kunt hierboven een eigen foto uploaden of een voorbeeld aanklikken. Het systeem doorloopt twee stappen:</p>
  <ol>
    <li><b>Gezichtsdetectie</b> – met <i>YuNet</i> wordt het gezicht in de afbeelding gelokaliseerd.</li>
    <li><b>Emotieherkenning</b> – het gevonden gezicht wordt door <i>MobileFaceNet</i> geanalyseerd om de meest waarschijnlijke emotie te voorspellen.</li>
  </ol>
  <p>Deze modellen zijn getraind met <b>machine learning</b>. Voor dit type taak <b>is supervised training gebruikt</b>:
  er is gewerkt met een grote dataset van gezichten waarbij elke foto een label (zoals “blij” of “boos”) heeft. Tijdens het trainen leert het model welke combinaties van gezichtskenmerken bij welke emotie horen.</p>
  <p>Door heel veel voorbeelden te zien, kan het model ook bij nieuwe foto’s een inschatting maken. Het kijkt niet naar één detail, maar naar patronen in het hele gezicht.</p>
</div>
"""

custom_css = """
#emotie-uitslag { color: #16a34a; }
#emotie-uitslag h1, #emotie-uitslag h2, #emotie-uitslag h3 { margin: 0.25rem 0; }
#uitleg-blok {
  background: #f3f4f6;
  border: 1px solid #e5e7eb;
  border-radius: 10px;
  padding: 12px 14px;
}
#uitleg-blok h3 { margin: 6px 0 8px 0; }
#uitleg-blok p  { margin: 6px 0; }
#uitleg-blok ul { margin: 6px 0 6px 18px; }
#uitleg-blok ol { margin: 6px 0 6px 18px; }
"""

with gr.Blocks(css=custom_css) as demo:
    gr.Markdown("## Herkenning van gezichtsuitdrukkingen (FER) met OpenCV DNN")
    gr.Markdown("Detecteert gezichten en herkent gezichtsuitdrukkingen met YuNet + MobileFaceNet (ONNX).")

    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="numpy", label="Afbeelding uploaden")
            with gr.Row():
                submit_btn = gr.Button("Verstuur", variant="primary")
                clear_btn = gr.Button("Wissen")
        with gr.Column():
            output_image = gr.Image(type="numpy", label="Resultaat gezichtsuitdrukking")
            emotion_md = gr.Markdown("## **Nog geen resultaat**", elem_id="emotie-uitslag")

    with gr.Row():
        with gr.Column():
            gr.Markdown("**Voorbeelden (klik om te testen):**")
            gr.Examples(
                examples=example_list,
                inputs=input_image,
                outputs=[output_image, emotion_md],
                fn=detect_expression_no_stats,
                examples_per_page=20,
                cache_examples=CACHE_EXAMPLES
            )
            gr.HTML(INFO_HTML, elem_id="uitleg-blok")

        with gr.Column():
            stats_image = gr.Image(
                label="Statistieken",
                type="numpy",
                value=draw_bar_chart_cv(emotion_stats)
            )

    def clear_all_on_new():
        return None, "## **Nog geen resultaat**"

    def clear_all_button():
        return None, None, "## **Nog geen resultaat**", draw_bar_chart_cv(emotion_stats)

    input_image.change(fn=clear_all_on_new, outputs=[output_image, emotion_md])
    submit_btn.click(fn=detect_expression, inputs=input_image, outputs=[output_image, emotion_md, stats_image])
    clear_btn.click(fn=clear_all_button, outputs=[input_image, output_image, emotion_md, stats_image])

if __name__ == "__main__":
    demo.launch()