Files changed (7) hide show
  1. .gitattributes +0 -1
  2. .gitignore +0 -13
  3. README.md +6 -24
  4. app.py +62 -338
  5. metadata.csv +0 -0
  6. metadata.py +0 -23
  7. requirements.txt +3 -6
.gitattributes CHANGED
@@ -34,4 +34,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.jpg filter=lfs diff=lfs merge=lfs -text
37
- spaces::accelerator gpu
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.jpg filter=lfs diff=lfs merge=lfs -text
 
.gitignore DELETED
@@ -1,13 +0,0 @@
1
- .env
2
- .venv
3
- .env.local
4
- .env.development.local
5
- .env.test.local
6
- .env.production.local
7
-
8
- /venv
9
- /embeddings
10
- /batches
11
- /metadata.csv
12
- /metadata.csv.gz
13
- /metadata.csv.gz.part
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,30 +1,12 @@
1
  ---
2
- title: Facial Recognition App
3
- emoji: 🔍
4
- colorFrom: blue
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.29.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- # Facial Recognition App
13
-
14
- This application uses DeepFace and Facenet for facial recognition and similarity matching.
15
-
16
- ## Hardware Requirements
17
- - GPU: Required
18
- - CPU: 4+ cores recommended
19
- - RAM: 8GB+ recommended
20
-
21
- ## Environment Setup
22
- The application requires the following key dependencies:
23
- - deepface
24
- - gradio
25
- - huggingface_hub
26
- - datasets
27
- - Pillow
28
- - numpy
29
-
30
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Face Recognition
3
+ emoji:
4
+ colorFrom: red
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 5.23.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,359 +1,83 @@
1
- import os
2
  import numpy as np
3
  from PIL import Image
4
  import gradio as gr
5
  from deepface import DeepFace
6
- from datasets import load_dataset
7
- import pickle
8
- from io import BytesIO
9
- from huggingface_hub import upload_file, hf_hub_download, list_repo_files
10
- from pathlib import Path
11
- import gc
12
- import requests
13
- import time
14
- import shutil
15
- import tarfile
16
- import tensorflow as tf
17
-
18
- # Configuración de GPU
19
- print("Dispositivos GPU disponibles:", tf.config.list_physical_devices('GPU'))
20
-
21
- # Configurar memoria GPU
22
- gpus = tf.config.list_physical_devices('GPU')
23
- if gpus:
24
- try:
25
- # Permitir crecimiento de memoria
26
- for gpu in gpus:
27
- tf.config.experimental.set_memory_growth(gpu, True)
28
- print("✅ GPU configurada correctamente")
29
-
30
- # Configurar para usar solo GPU
31
- tf.config.set_visible_devices(gpus[0], 'GPU')
32
- print(f"✅ Usando GPU: {gpus[0]}")
33
- except RuntimeError as e:
34
- print(f"⚠️ Error configurando GPU: {e}")
35
- else:
36
- print("⚠️ No se detectó GPU, usando CPU")
37
-
38
- # Configurar para usar mixed precision
39
- tf.keras.mixed_precision.set_global_policy('mixed_float16')
40
-
41
- # 🔁 Limpiar almacenamiento temporal si existe
42
- def clean_temp_dirs():
43
- print("🧹 Limpiando carpetas temporales...")
44
- for folder in ["embeddings", "batches"]:
45
- path = Path(folder)
46
- if path.exists() and path.is_dir():
47
- shutil.rmtree(path)
48
- print(f"✅ Carpeta eliminada: {folder}")
49
- path.mkdir(exist_ok=True)
50
-
51
- clean_temp_dirs()
52
-
53
- # 📁 Parámetros
54
- DATASET_ID = "Segizu/facial-recognition-preview"
55
- EMBEDDINGS_SUBFOLDER = "embeddings"
56
- LOCAL_EMB_DIR = Path("embeddings")
57
- LOCAL_EMB_DIR.mkdir(exist_ok=True)
58
- HF_TOKEN = os.getenv("HF_TOKEN")
59
- headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
60
-
61
- # 💾 Configuración
62
- MAX_TEMP_STORAGE_GB = 40
63
- UPLOAD_EVERY = 50
64
-
65
- def get_folder_size(path):
66
- total = 0
67
- for dirpath, _, filenames in os.walk(path):
68
- for f in filenames:
69
- fp = os.path.join(dirpath, f)
70
- total += os.path.getsize(fp)
71
- return total / (1024 ** 3)
72
-
73
- def preprocess_image(img: Image.Image) -> np.ndarray:
74
- # Convertir a RGB si no lo es
75
- if img.mode != 'RGB':
76
- img = img.convert('RGB')
77
-
78
- # Obtener la orientación EXIF si existe
79
- try:
80
- exif = img._getexif()
81
- if exif is not None:
82
- orientation = exif.get(274) # 274 es el tag de orientación en EXIF
83
- if orientation is not None:
84
- # Rotar la imagen según la orientación EXIF
85
- if orientation == 3:
86
- img = img.rotate(180, expand=True)
87
- elif orientation == 6:
88
- img = img.rotate(270, expand=True)
89
- elif orientation == 8:
90
- img = img.rotate(90, expand=True)
91
- except:
92
- pass # Si no hay EXIF o hay error, continuamos con la imagen original
93
-
94
- # Intentar detectar la orientación del rostro
95
- try:
96
- # Convertir a array numpy para DeepFace
97
- img_array = np.array(img)
98
- # Detectar rostros con GPU
99
- face_objs = DeepFace.extract_faces(
100
- img_path=img_array,
101
- target_size=(160, 160),
102
- detector_backend='retinaface',
103
- enforce_detection=False
104
- )
105
-
106
- if face_objs and len(face_objs) > 0:
107
- # Si se detecta un rostro, usar la imagen detectada
108
- img_array = face_objs[0]['face']
109
- return img_array
110
- except:
111
- pass # Si falla la detección, continuamos con el procesamiento normal
112
-
113
- # Si no se detectó rostro o falló la detección, redimensionar la imagen original
114
- img_resized = img.resize((160, 160), Image.Resampling.LANCZOS)
115
  return np.array(img_resized)
116
 
117
- # Cargar CSV desde el dataset
118
- dataset = load_dataset(
119
- "csv",
120
- data_files="metadata.csv",
121
- split="train",
122
- column_names=["image"],
123
- header=0
124
- )
125
  def build_database():
126
- print(f"📊 Uso actual de almacenamiento temporal INICIO: {get_folder_size('.'):.2f} GB")
127
- print("🔄 Generando embeddings...")
128
- batch_size = 10
129
- archive_batch_size = 50
130
- batch_files = []
131
- batch_index = 0
132
- ARCHIVE_DIR = Path("batches")
133
- ARCHIVE_DIR.mkdir(exist_ok=True)
134
-
135
- for i in range(0, len(dataset), batch_size):
136
- batch = dataset[i:i + batch_size]
137
- print(f"📦 Lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
138
-
139
- for j in range(len(batch["image"])):
140
- image_url = batch["image"][j]
141
-
142
- if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
143
- print(f"⚠️ Saltando {i + j} - URL inválida: {image_url}")
144
- continue
145
-
146
- name = f"image_{i + j}"
147
- filename = LOCAL_EMB_DIR / f"{name}.pkl"
148
-
149
- # Verificar si ya fue subido
150
- try:
151
- hf_hub_download(
152
- repo_id=DATASET_ID,
153
- repo_type="dataset",
154
- filename=f"{EMBEDDINGS_SUBFOLDER}/batch_{batch_index:03}.tar.gz",
155
- token=HF_TOKEN
156
- )
157
- print(f"⏩ Ya existe en remoto: {name}.pkl")
158
- continue
159
- except:
160
- pass
161
-
162
- try:
163
- response = requests.get(image_url, headers=headers, timeout=10)
164
- response.raise_for_status()
165
- img = Image.open(BytesIO(response.content)).convert("RGB")
166
-
167
- img_processed = preprocess_image(img)
168
- embedding = DeepFace.represent(
169
- img_path=img_processed,
170
- model_name="Facenet",
171
- enforce_detection=False
172
- )[0]["embedding"]
173
-
174
- with open(filename, "wb") as f:
175
- pickle.dump({"name": name, "img": img, "embedding": embedding}, f)
176
-
177
- batch_files.append(filename)
178
- del img_processed
179
- gc.collect()
180
-
181
- if len(batch_files) >= archive_batch_size or get_folder_size(".") > MAX_TEMP_STORAGE_GB:
182
- archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
183
- with tarfile.open(archive_path, "w:gz") as tar:
184
- for file in batch_files:
185
- tar.add(file, arcname=file.name)
186
-
187
- print(f"📦 Empaquetado: {archive_path}")
188
-
189
- upload_file(
190
- path_or_fileobj=str(archive_path),
191
- path_in_repo=f"{EMBEDDINGS_SUBFOLDER}/{archive_path.name}",
192
- repo_id=DATASET_ID,
193
- repo_type="dataset",
194
- token=HF_TOKEN
195
- )
196
- print(f"✅ Subido: {archive_path.name}")
197
-
198
- for f in batch_files:
199
- f.unlink()
200
- archive_path.unlink()
201
- print("🧹 Limpieza completada tras subida")
202
-
203
- batch_files = []
204
- batch_index += 1
205
- time.sleep(2)
206
- print(f"📊 Uso actual FINAL: {get_folder_size('.'):.2f} GB")
207
-
208
- except Exception as e:
209
- print(f"❌ Error en {name}: {e}")
210
- continue
211
-
212
- if batch_files:
213
- archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
214
- with tarfile.open(archive_path, "w:gz") as tar:
215
- for file in batch_files:
216
- tar.add(file, arcname=file.name)
217
-
218
- print(f"📦 Empaquetado final: {archive_path}")
219
-
220
- upload_file(
221
- path_or_fileobj=str(archive_path),
222
- path_in_repo=f"{EMBEDDINGS_SUBFOLDER}/{archive_path.name}",
223
- repo_id=DATASET_ID,
224
- repo_type="dataset",
225
- token=HF_TOKEN
226
- )
227
-
228
- for f in batch_files:
229
- f.unlink()
230
- archive_path.unlink()
231
- print("✅ Subida y limpieza final")
232
-
233
- # 🔍 Buscar similitudes
234
- def find_similar_faces(uploaded_image: Image.Image):
235
- if uploaded_image is None:
236
- return [], "⚠ Por favor, sube una imagen primero"
237
-
238
- try:
239
- print("🔄 Procesando imagen de entrada...")
240
- # Convertir a RGB si no lo es
241
- if uploaded_image.mode != 'RGB':
242
- uploaded_image = uploaded_image.convert('RGB')
243
-
244
- # Mostrar dimensiones de la imagen
245
- print(f"📐 Dimensiones de la imagen: {uploaded_image.size}")
246
-
247
- img_processed = preprocess_image(uploaded_image)
248
- print("✅ Imagen preprocesada correctamente")
249
-
250
- # Intentar primero con enforce_detection=True
251
  try:
252
- query_embedding = DeepFace.represent(
 
 
253
  img_path=img_processed,
254
  model_name="Facenet",
255
- enforce_detection=True,
256
- detector_backend='retinaface'
257
  )[0]["embedding"]
258
- print(" Rostro detectado con enforce_detection=True")
259
  except Exception as e:
260
- print(f"�� No se pudo detectar rostro con enforce_detection=True, intentando con False: {str(e)}")
261
- # Si falla, intentar con enforce_detection=False
262
- query_embedding = DeepFace.represent(
263
- img_path=img_processed,
264
- model_name="Facenet",
265
- enforce_detection=False,
266
- detector_backend='retinaface'
267
- )[0]["embedding"]
268
- print("✅ Embedding generado con enforce_detection=False")
269
-
270
- del img_processed
271
- gc.collect()
272
-
273
- except Exception as e:
274
- print(f"❌ Error en procesamiento de imagen: {str(e)}")
275
- return [], f"⚠ Error procesando imagen: {str(e)}"
276
-
277
- similarities = []
278
- print("🔍 Buscando similitudes en la base de datos...")
279
 
 
 
280
  try:
281
- embedding_files = [
282
- f for f in list_repo_files(DATASET_ID, repo_type="dataset", token=HF_TOKEN)
283
- if f.startswith(f"{EMBEDDINGS_SUBFOLDER}/") and f.endswith(".tar.gz")
284
- ]
285
- print(f"📁 Encontrados {len(embedding_files)} archivos de embeddings")
286
- except Exception as e:
287
- print(f"❌ Error obteniendo archivos: {str(e)}")
288
- return [], f"⚠ Error obteniendo archivos: {str(e)}"
289
-
290
- # Procesar en lotes para mejor rendimiento
291
- batch_size = 10
292
- for i in range(0, len(embedding_files), batch_size):
293
- batch_files = embedding_files[i:i + batch_size]
294
- print(f"📦 Procesando lote {i//batch_size + 1}/{(len(embedding_files) + batch_size - 1)//batch_size}")
295
-
296
- for file_path in batch_files:
297
- try:
298
- file_bytes = requests.get(
299
- f"https://huggingface.co/datasets/{DATASET_ID}/resolve/main/{file_path}",
300
- headers=headers,
301
- timeout=30
302
- ).content
303
-
304
- # Crear un archivo temporal para el tar.gz
305
- temp_archive = Path("temp_archive.tar.gz")
306
- with open(temp_archive, "wb") as f:
307
- f.write(file_bytes)
308
-
309
- # Extraer el contenido
310
- with tarfile.open(temp_archive, "r:gz") as tar:
311
- tar.extractall(path="temp_extract")
312
-
313
- # Procesar cada archivo .pkl en el tar
314
- for pkl_file in Path("temp_extract").glob("*.pkl"):
315
- with open(pkl_file, "rb") as f:
316
- record = pickle.load(f)
317
-
318
- name = record["name"]
319
- img = record["img"]
320
- emb = record["embedding"]
321
-
322
- dist = np.linalg.norm(np.array(query_embedding) - np.array(emb))
323
- sim_score = 1 / (1 + dist)
324
- similarities.append((sim_score, name, np.array(img)))
325
-
326
- # Limpiar archivos temporales
327
- shutil.rmtree("temp_extract")
328
- temp_archive.unlink()
329
-
330
- except Exception as e:
331
- print(f"⚠ Error procesando {file_path}: {e}")
332
- continue
333
 
334
- if not similarities:
335
- return [], "⚠ No se encontraron similitudes en la base de datos"
 
 
 
336
 
337
- print(f"✅ Encontradas {len(similarities)} similitudes")
338
  similarities.sort(reverse=True)
339
- top = similarities[:5]
340
- gallery = [(img, f"{name} - Similitud: {sim:.2f}") for sim, name, img in top]
341
- summary = "\n".join([f"{name} - Similitud: {sim:.2f}" for sim, name, _ in top])
342
- return gallery, summary
343
 
344
- # 🎛️ Interfaz Gradio
345
- with gr.Blocks() as demo:
346
- gr.Markdown("## 🔍 Reconocimiento facial con DeepFace + ZeroGPU")
347
- with gr.Row():
348
- image_input = gr.Image(label="📤 Sube una imagen", type="pil")
349
- find_btn = gr.Button("🔎 Buscar similares")
350
- gallery = gr.Gallery(label="📸 Rostros similares")
351
- summary = gr.Textbox(label="🧠 Detalle", lines=6)
352
- find_btn.click(fn=find_similar_faces, inputs=image_input, outputs=[gallery, summary])
353
 
354
- with gr.Row():
355
- build_btn = gr.Button("���️ Construir base de embeddings (usa GPU)")
356
- build_btn.click(fn=build_database, inputs=[], outputs=[])
357
 
358
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
359
 
 
 
 
1
  import numpy as np
2
  from PIL import Image
3
  import gradio as gr
4
  from deepface import DeepFace
5
+ from datasets import load_dataset, DownloadConfig
6
+ import os
7
+ os.system("rm -rf ~/.cache/huggingface/hub/datasets--Segizu--dataset_faces")
8
+
9
+ # Cargar el dataset de Hugging Face forzando la descarga limpia
10
+ download_config = DownloadConfig(force_download=True)
11
+ dataset = load_dataset("Segizu/dataset_faces", download_config=download_config)
12
+ if "train" in dataset:
13
+ dataset = dataset["train"]
14
+
15
+ # 🔄 Preprocesar imagen para Facenet
16
+ def preprocess_image(img):
17
+ img_rgb = img.convert("RGB")
18
+ img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  return np.array(img_resized)
20
 
21
+ # 📦 Construir base de datos de embeddings
 
 
 
 
 
 
 
22
  def build_database():
23
+ database = []
24
+ for i, item in enumerate(dataset):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  try:
26
+ img = item["image"]
27
+ img_processed = preprocess_image(img)
28
+ embedding = DeepFace.represent(
29
  img_path=img_processed,
30
  model_name="Facenet",
31
+ enforce_detection=False
 
32
  )[0]["embedding"]
33
+ database.append((f"image_{i}", img, embedding))
34
  except Exception as e:
35
+ print(f" No se pudo procesar imagen {i}: {e}")
36
+ return database
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ # 🔍 Buscar rostros similares
39
+ def find_similar_faces(uploaded_image):
40
  try:
41
+ img_processed = preprocess_image(uploaded_image)
42
+ query_embedding = DeepFace.represent(
43
+ img_path=img_processed,
44
+ model_name="Facenet",
45
+ enforce_detection=False
46
+ )[0]["embedding"]
47
+ except:
48
+ return [], "⚠ No se detectó un rostro válido en la imagen."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ similarities = []
51
+ for name, db_img, embedding in database:
52
+ dist = np.linalg.norm(np.array(query_embedding) - np.array(embedding))
53
+ sim_score = 1 / (1 + dist)
54
+ similarities.append((sim_score, name, db_img))
55
 
 
56
  similarities.sort(reverse=True)
57
+ top_matches = similarities[:]
 
 
 
58
 
59
+ gallery_items = []
60
+ text_summary = ""
61
+ for sim, name, img in top_matches:
62
+ caption = f"{name} - Similitud: {sim:.2f}"
63
+ gallery_items.append((img, caption))
64
+ text_summary += caption + "\n"
 
 
 
65
 
66
+ return gallery_items, text_summary
 
 
67
 
68
+ # ⚙️ Inicializar base
69
+ database = build_database()
70
+
71
+ # 🎛️ Interfaz Gradio
72
+ demo = gr.Interface(
73
+ fn=find_similar_faces,
74
+ inputs=gr.Image(label="📤 Sube una imagen", type="pil"),
75
+ outputs=[
76
+ gr.Gallery(label="📸 Rostros más similares"),
77
+ gr.Textbox(label="🧠 Similitud", lines=6)
78
+ ],
79
+ title="🔍 Buscador de Rostros con DeepFace",
80
+ description="Sube una imagen y se comparará contra los rostros del dataset alojado en Hugging Face (`Segizu/dataset_faces`)."
81
+ )
82
 
83
+ demo.launch()
metadata.csv DELETED
The diff for this file is too large to render. See raw diff
 
metadata.py DELETED
@@ -1,23 +0,0 @@
1
- from huggingface_hub import HfApi
2
- import csv
3
- import os
4
-
5
- HF_TOKEN = os.getenv("HF_TOKEN") or ""
6
- repo_id = "Segizu/facial-recognition"
7
-
8
- api = HfApi()
9
- files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=HF_TOKEN)
10
-
11
- # Generar URLs completas
12
- base_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/"
13
- image_urls = [base_url + f for f in files if f.lower().endswith(".jpg")]
14
-
15
- # Escribir nuevo metadata.csv
16
- with open("metadata.csv", "w", newline="") as f:
17
- writer = csv.writer(f)
18
- writer.writerow(["image"])
19
- for url in image_urls:
20
- writer.writerow([url])
21
-
22
- print(f"✅ metadata.csv regenerado con URLs absolutas ({len(image_urls)} imágenes)")
23
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio==3.50.2
2
  numpy
3
  Pillow
4
  opencv-python-headless
@@ -6,9 +6,6 @@ opencv-python-headless
6
  # DeepFace desde GitHub
7
  git+https://github.com/serengil/deepface.git
8
 
9
- # TensorFlow con soporte GPU
10
- tensorflow-gpu==2.15.0
11
  tf-keras
12
- spaces
13
- datasets
14
- pydantic>=2.0.0,<3.0.0
 
1
+ gradio
2
  numpy
3
  Pillow
4
  opencv-python-headless
 
6
  # DeepFace desde GitHub
7
  git+https://github.com/serengil/deepface.git
8
 
9
+ # Fixes para RetinaFace
10
+ tensorflow==2.12.0
11
  tf-keras