Spaces:

Segizu
/

Face_Recognition

Build error

App Files Files Community

Face_Recognition / app.py

Segizu

no cache embeddings baches

9bc27e3 3 months ago

raw

history blame

4.52 kB

	import numpy as np
	from PIL import Image
	import gradio as gr
	from deepface import DeepFace
	from datasets import load_dataset, DownloadConfig
	import os
	import pickle
	from pathlib import Path
	import gc

	# 🔑 Configurar token de Hugging Face
	HF_TOKEN = os.getenv("HF_TOKEN")
	if not HF_TOKEN:
	raise ValueError("⚠️ Por favor, configura la variable de entorno HF_TOKEN para acceder al dataset privado")

	# 📁 Configurar directorio de embeddings
	EMBEDDINGS_DIR = Path("embeddings")
	EMBEDDINGS_DIR.mkdir(exist_ok=True)
	EMBEDDINGS_FILE = EMBEDDINGS_DIR / "embeddings.pkl"

	os.system("rm -rf ~/.cache/huggingface/hub/datasets--Segizu--facial-recognition")

	# ✅ Cargar el dataset de Hugging Face forzando la descarga limpia
	download_config = DownloadConfig(
	force_download=True,
	token=HF_TOKEN
	)
	dataset = load_dataset("Segizu/facial-recognition", download_config=download_config)
	if "train" in dataset:
	dataset = dataset["train"]

	# 🔄 Preprocesar imagen para Facenet
	def preprocess_image(img):
	img_rgb = img.convert("RGB")
	img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
	return np.array(img_resized)

	# 📦 Construir base de datos de embeddings
	def build_database():
	# Intentar cargar embeddings desde el archivo
	if EMBEDDINGS_FILE.exists():
	print("📂 Cargando embeddings desde el archivo...")
	with open(EMBEDDINGS_FILE, 'rb') as f:
	return pickle.load(f)

	print("🔄 Calculando embeddings (esto puede tomar unos minutos)...")
	database = []
	batch_size = 10 # Procesar 10 imágenes a la vez

	for i in range(0, len(dataset), batch_size):
	batch = dataset[i:i + batch_size]
	print(f"📦 Procesando lote {i//batch_size + 1}/{(len(dataset) + batch_size - 1)//batch_size}")

	for j, item in enumerate(batch):
	try:
	img = item["image"]
	img_processed = preprocess_image(img)
	embedding = DeepFace.represent(
	img_path=img_processed,
	model_name="Facenet",
	enforce_detection=False
	)[0]["embedding"]
	database.append((f"image_{i+j}", img, embedding))
	print(f"✅ Procesada imagen {i+j+1}/{len(dataset)}")

	# Liberar memoria
	del img_processed
	gc.collect()

	except Exception as e:
	print(f"❌ No se pudo procesar imagen {i+j}: {e}")

	# Guardar progreso después de cada lote
	print("💾 Guardando progreso...")
	with open(EMBEDDINGS_FILE, 'wb') as f:
	pickle.dump(database, f)

	# Liberar memoria después de cada lote
	gc.collect()

	return database

	# 🔍 Buscar rostros similares
	def find_similar_faces(uploaded_image):
	try:
	img_processed = preprocess_image(uploaded_image)
	query_embedding = DeepFace.represent(
	img_path=img_processed,
	model_name="Facenet",
	enforce_detection=False
	)[0]["embedding"]

	# Liberar memoria
	del img_processed
	gc.collect()

	except:
	return [], "⚠ No se detectó un rostro válido en la imagen."

	similarities = []
	for name, db_img, embedding in database:
	dist = np.linalg.norm(np.array(query_embedding) - np.array(embedding))
	sim_score = 1 / (1 + dist)
	similarities.append((sim_score, name, db_img))

	similarities.sort(reverse=True)
	top_matches = similarities[:5]

	gallery_items = []
	text_summary = ""
	for sim, name, img in top_matches:
	caption = f"{name} - Similitud: {sim:.2f}"
	gallery_items.append((img, caption))
	text_summary += caption + "\n"

	return gallery_items, text_summary

	# ⚙️ Inicializar base
	print("🚀 Iniciando aplicación...")
	database = build_database()
	print(f"✅ Base de datos cargada con {len(database)} imágenes")

	# 🎛️ Interfaz Gradio
	demo = gr.Interface(
	fn=find_similar_faces,
	inputs=gr.Image(label="📤 Sube una imagen", type="pil"),
	outputs=[
	gr.Gallery(label="📸 Rostros más similares"),
	gr.Textbox(label="🧠 Similitud", lines=6)
	],
	title="🔍 Buscador de Rostros con DeepFace",
	description="Sube una imagen y se comparará contra los rostros del dataset alojado en Hugging Face (`Segizu/facial-recognition`)."
	)

	demo.launch()