Marcel0123's picture
Upload app.py
6baf719 verified
import cv2 as cv
import numpy as np
import gradio as gr
from pathlib import Path
from collections import Counter, defaultdict
from huggingface_hub import hf_hub_download
from facial_fer_model import FacialExpressionRecog
from yunet import YuNet
# Download ONNX-modellen
FD_MODEL_PATH = hf_hub_download(repo_id="opencv/face_detection_yunet", filename="face_detection_yunet_2023mar.onnx")
FER_MODEL_PATH = hf_hub_download(repo_id="opencv/facial_expression_recognition", filename="facial_expression_recognition_mobilefacenet_2022july.onnx")
backend_id = cv.dnn.DNN_BACKEND_OPENCV
target_id = cv.dnn.DNN_TARGET_CPU
fer_model = FacialExpressionRecog(modelPath=FER_MODEL_PATH, backendId=backend_id, targetId=target_id)
detect_model = YuNet(modelPath=FD_MODEL_PATH)
# EN -> NL mapping (lowercase) incl. varianten/typo's
EN_TO_NL = {
"neutral": "neutraal",
"happy": "blij",
"happiness": "blij",
"sad": "verdrietig",
"sadness": "verdrietig",
"surprise": "verrast",
"surprised": "verrast",
"supprised": "verrast", # typo
"surprized": "verrast",
"angry": "boos",
"anger": "boos",
"disgust": "walging",
"fear": "angstig",
"fearful": "angstig",
"fearfull": "angstig", # typo
"contempt": "minachting",
"unknown": "onbekend",
}
def to_dutch_lower(label: str) -> str:
if not label:
return "onbekend"
key = label.strip().lower()
return EN_TO_NL.get(key, key)
emotion_stats = defaultdict(int)
# Confidence helpers
def _format_pct(conf):
if conf is None:
return None
try:
c = float(conf)
except Exception:
return None
if c <= 1.0:
c *= 100.0
c = max(0.0, min(100.0, c))
return f"{int(round(c))}%"
def _parse_infer_output(result):
if isinstance(result, np.ndarray):
arr = result
if arr.ndim == 1 and arr.size > 1:
idx = int(np.argmax(arr))
conf = float(arr[idx])
return idx, conf
elif arr.size == 1:
return int(arr.flat[0]), None
else:
try:
idx = int(arr[0])
return idx, None
except Exception:
return 0, None
if isinstance(result, (list, tuple)):
if len(result) >= 2 and isinstance(result[1], (float, np.floating, int, np.integer)):
try:
return int(result[0]), float(result[1])
except Exception:
pass
if len(result) >= 1:
try:
return int(result[0]), None
except Exception:
return 0, None
try:
return int(result), None
except Exception:
return 0, None
def visualize(image, det_res, labels, confs):
output = image.copy()
landmark_color = [(255, 0, 0), (0, 0, 255), (0, 255, 0), (255, 0, 255), (0, 255, 255)]
for i, (det, lab) in enumerate(zip(det_res, labels)):
bbox = det[0:4].astype(np.int32)
label_en = FacialExpressionRecog.getDesc(lab)
fer_type_str_nl = to_dutch_lower(label_en)
pct = _format_pct(confs[i] if i < len(confs) else None)
txt = f"{fer_type_str_nl}" + (f" {pct}" if pct else "")
cv.rectangle(output, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 0), 2)
cv.putText(output, txt, (bbox[0], max(0, bbox[1] - 10)), cv.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv.LINE_AA)
landmarks = det[4:14].astype(np.int32).reshape((5, 2))
for idx, landmark in enumerate(landmarks):
cv.circle(output, landmark, 2, landmark_color[idx], 2)
return output
def summarize_emotions(labels, confs):
if not labels:
return "## **geen gezicht gedetecteerd**"
names_nl = [to_dutch_lower(FacialExpressionRecog.getDesc(lab)) for lab in labels]
counts = Counter(names_nl)
conf_bucket = defaultdict(list)
for i, name in enumerate(names_nl):
if i < len(confs) and confs[i] is not None:
conf_bucket[name].append(float(confs[i]))
top = counts.most_common(1)[0][0]
parts = []
for name, n in sorted(counts.items(), key=lambda kv: (-kv[1], kv[0])):
if conf_bucket[name]:
avg = sum(conf_bucket[name]) / len(conf_bucket[name])
parts.append(f"{name} ({n}, gem. {_format_pct(avg)})")
else:
parts.append(f"{name} ({n})")
details = ", ".join(parts)
return f"# **{top}**\n\n_Gedetecteerde emoties: {details}_"
def process_image(input_image):
image = cv.cvtColor(input_image, cv.COLOR_RGB2BGR)
h, w, _ = image.shape
detect_model.setInputSize([w, h])
dets = detect_model.infer(image)
if dets is None:
return cv.cvtColor(image, cv.COLOR_BGR2RGB), [], [], None
labels, confs = [], []
for face_points in dets:
raw = fer_model.infer(image, face_points[:-1])
lab, conf = _parse_infer_output(raw)
labels.append(lab)
confs.append(conf)
output = visualize(image, dets, labels, confs)
return cv.cvtColor(output, cv.COLOR_BGR2RGB), labels, confs, dets
def detect_expression(input_image):
output_img, labels, confs, _ = process_image(input_image)
emotion_md = summarize_emotions(labels, confs)
for lab in labels:
name_nl = to_dutch_lower(FacialExpressionRecog.getDesc(lab))
emotion_stats[name_nl] += 1
stats_plot = draw_bar_chart_cv(emotion_stats)
return output_img, emotion_md, stats_plot
def detect_expression_no_stats(input_image):
output_img, labels, confs, _ = process_image(input_image)
emotion_md = summarize_emotions(labels, confs)
return output_img, emotion_md
def draw_bar_chart_cv(stats: dict, width=640, height=320):
img = np.full((height, width, 3), 255, dtype=np.uint8)
cv.putText(img, "Live emotie-statistieken", (12, 28), cv.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2, cv.LINE_AA)
if not stats:
cv.putText(img, "Nog geen statistieken", (12, height//2), cv.FONT_HERSHEY_SIMPLEX, 0.9, (128, 128, 128), 2, cv.LINE_AA)
return cv.cvtColor(img, cv.COLOR_BGR2RGB)
left, right, top, bottom = 60, 20, 50, 40
plot_w = width - left - right
plot_h = height - top - bottom
origin = (left, height - bottom)
cv.line(img, origin, (left + plot_w, height - bottom), (0, 0, 0), 2)
cv.line(img, origin, (left, height - bottom - plot_h), (0, 0, 0), 2)
labels = list(stats.keys())
values = [stats[k] for k in labels]
max_val = max(values) if max(values) > 0 else 1
n = len(labels)
gap = 12
bar_w = max(10, int((plot_w - gap * (n + 1)) / max(1, n)))
for i, (lab, val) in enumerate(zip(labels, values)):
x1 = left + gap + i * (bar_w + gap)
x2 = x1 + bar_w
h_px = int((val / max_val) * (plot_h - 10))
y1 = height - bottom - h_px
y2 = height - bottom - 1
cv.rectangle(img, (x1, y1), (x2, y2), (0, 170, 60), -1)
cv.putText(img, str(val), (x1 + 2, y1 - 6), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 90, 30), 1, cv.LINE_AA)
show_lab = lab if len(lab) <= 12 else lab[:11] + "…"
(tw, th), _ = cv.getTextSize(show_lab, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
tx = x1 + (bar_w - tw) // 2
ty = height - bottom + th + 12
cv.putText(img, show_lab, (tx, ty), cv.FONT_HERSHEY_SIMPLEX, 0.5, (40, 40, 40), 1, cv.LINE_AA)
return cv.cvtColor(img, cv.COLOR_BGR2RGB)
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".webp"}
EXAMPLES_DIR = Path("examples")
if EXAMPLES_DIR.exists() and EXAMPLES_DIR.is_dir():
example_paths = [str(p) for p in sorted(EXAMPLES_DIR.iterdir()) if Path(p).suffix.lower() in IMAGE_EXTS]
else:
example_paths = []
example_list = [[p] for p in example_paths]
CACHE_EXAMPLES = bool(example_list)
INFO_HTML = """
<div>
<h3>Hoe werkt deze gezichtsuitdrukking-herkenner?</h3>
<p>Dit model kan automatisch acht emoties herkennen in een foto van een gezicht:</p>
<ul>
<li>neutraal</li>
<li>blij</li>
<li>verdrietig</li>
<li>verrast</li>
<li>boos</li>
<li>walging</li>
<li>angstig</li>
<li>minachting</li>
</ul>
<p>Je kunt hierboven een eigen foto uploaden of een voorbeeld aanklikken. Het systeem doorloopt twee stappen:</p>
<ol>
<li><b>Gezichtsdetectie</b> – met <i>YuNet</i> wordt het gezicht in de afbeelding gelokaliseerd.</li>
<li><b>Emotieherkenning</b> – het gevonden gezicht wordt door <i>MobileFaceNet</i> geanalyseerd om de meest waarschijnlijke emotie te voorspellen.</li>
</ol>
<p>Deze modellen zijn getraind met <b>machine learning</b>. Voor dit type taak <b>is supervised training gebruikt</b>:
er is gewerkt met een grote dataset van gezichten waarbij elke foto een label (zoals “blij” of “boos”) heeft. Tijdens het trainen leert het model welke combinaties van gezichtskenmerken bij welke emotie horen.</p>
<p>Door heel veel voorbeelden te zien, kan het model ook bij nieuwe foto’s een inschatting maken. Het kijkt niet naar één detail, maar naar patronen in het hele gezicht.</p>
</div>
"""
custom_css = """
#emotie-uitslag { color: #16a34a; }
#emotie-uitslag h1, #emotie-uitslag h2, #emotie-uitslag h3 { margin: 0.25rem 0; }
#uitleg-blok {
background: #f3f4f6;
border: 1px solid #e5e7eb;
border-radius: 10px;
padding: 12px 14px;
}
#uitleg-blok h3 { margin: 6px 0 8px 0; }
#uitleg-blok p { margin: 6px 0; }
#uitleg-blok ul { margin: 6px 0 6px 18px; }
#uitleg-blok ol { margin: 6px 0 6px 18px; }
"""
with gr.Blocks(css=custom_css) as demo:
gr.Markdown("## Herkenning van gezichtsuitdrukkingen (FER) met OpenCV DNN")
gr.Markdown("Detecteert gezichten en herkent gezichtsuitdrukkingen met YuNet + MobileFaceNet (ONNX).")
with gr.Row():
with gr.Column():
input_image = gr.Image(type="numpy", label="Afbeelding uploaden")
with gr.Row():
submit_btn = gr.Button("Verstuur", variant="primary")
clear_btn = gr.Button("Wissen")
with gr.Column():
output_image = gr.Image(type="numpy", label="Resultaat gezichtsuitdrukking")
emotion_md = gr.Markdown("## **Nog geen resultaat**", elem_id="emotie-uitslag")
with gr.Row():
with gr.Column():
gr.Markdown("**Voorbeelden (klik om te testen):**")
gr.Examples(
examples=example_list,
inputs=input_image,
outputs=[output_image, emotion_md],
fn=detect_expression_no_stats,
examples_per_page=20,
cache_examples=CACHE_EXAMPLES
)
gr.HTML(INFO_HTML, elem_id="uitleg-blok")
with gr.Column():
stats_image = gr.Image(
label="Statistieken",
type="numpy",
value=draw_bar_chart_cv(emotion_stats)
)
def clear_all_on_new():
return None, "## **Nog geen resultaat**"
def clear_all_button():
return None, None, "## **Nog geen resultaat**", draw_bar_chart_cv(emotion_stats)
input_image.change(fn=clear_all_on_new, outputs=[output_image, emotion_md])
submit_btn.click(fn=detect_expression, inputs=input_image, outputs=[output_image, emotion_md, stats_image])
clear_btn.click(fn=clear_all_button, outputs=[input_image, output_image, emotion_md, stats_image])
if __name__ == "__main__":
demo.launch()