Segizu commited on
Commit
e4617b7
·
1 Parent(s): dba2b8b

metadata v4

Browse files
Files changed (3) hide show
  1. app.py +27 -12
  2. metadata.csv +0 -0
  3. metadata.py +14 -31
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import numpy as np
2
- from PIL import Image
3
  import gradio as gr
4
  from deepface import DeepFace
5
  from datasets import load_dataset, DownloadConfig, Image as HfImage
@@ -7,6 +7,8 @@ import os
7
  import pickle
8
  from pathlib import Path
9
  import gc
 
 
10
 
11
  # 🔑 Configurar token de Hugging Face
12
  HF_TOKEN = os.getenv("HF_TOKEN")
@@ -38,44 +40,57 @@ def build_database():
38
  print("📂 Cargando embeddings desde el archivo...")
39
  with open(EMBEDDINGS_FILE, 'rb') as f:
40
  return pickle.load(f)
41
-
42
  print("🔄 Calculando embeddings (esto puede tomar unos minutos)...")
43
  database = []
44
  batch_size = 10
45
-
46
  for i in range(0, len(dataset), batch_size):
47
  batch = dataset[i:i + batch_size]
48
  print(f"📦 Procesando lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
49
-
50
  for j, item in enumerate(batch):
51
  try:
52
- img = item["image"] # Ya es un objeto PIL.Image
 
 
 
 
 
 
 
 
 
 
 
53
  img_processed = preprocess_image(img)
54
  embedding = DeepFace.represent(
55
  img_path=img_processed,
56
  model_name="Facenet",
57
  enforce_detection=False
58
  )[0]["embedding"]
59
-
60
  database.append((f"image_{i+j}", img, embedding))
61
  print(f"✅ Procesada imagen {i+j+1}/{len(dataset)}")
62
-
63
- # Liberar memoria
64
  del img_processed
65
  gc.collect()
66
-
 
 
 
67
  except Exception as e:
68
  print(f"❌ No se pudo procesar imagen {i+j}: {str(e)}")
69
  continue
70
-
71
  # Guardar progreso
72
  if database:
73
  print("💾 Guardando progreso...")
74
  with open(EMBEDDINGS_FILE, 'wb') as f:
75
  pickle.dump(database, f)
76
-
77
  gc.collect()
78
-
79
  return database
80
 
81
  # 🔍 Buscar rostros similares
 
1
  import numpy as np
2
+ from PIL import Image, UnidentifiedImageError
3
  import gradio as gr
4
  from deepface import DeepFace
5
  from datasets import load_dataset, DownloadConfig, Image as HfImage
 
7
  import pickle
8
  from pathlib import Path
9
  import gc
10
+ import requests
11
+ from io import BytesIO
12
 
13
  # 🔑 Configurar token de Hugging Face
14
  HF_TOKEN = os.getenv("HF_TOKEN")
 
40
  print("📂 Cargando embeddings desde el archivo...")
41
  with open(EMBEDDINGS_FILE, 'rb') as f:
42
  return pickle.load(f)
43
+
44
  print("🔄 Calculando embeddings (esto puede tomar unos minutos)...")
45
  database = []
46
  batch_size = 10
47
+
48
  for i in range(0, len(dataset), batch_size):
49
  batch = dataset[i:i + batch_size]
50
  print(f"📦 Procesando lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
51
+
52
  for j, item in enumerate(batch):
53
  try:
54
+ # Manejar ruta local o URL remota
55
+ img_data = item["image"]
56
+ if isinstance(img_data, str):
57
+ response = requests.get(img_data)
58
+ img = Image.open(BytesIO(response.content))
59
+ elif isinstance(img_data, dict) and "bytes" in img_data:
60
+ img = Image.open(BytesIO(img_data["bytes"]))
61
+ elif isinstance(img_data, Image.Image):
62
+ img = img_data
63
+ else:
64
+ raise ValueError(f"Formato de imagen no soportado: {type(img_data)}")
65
+
66
  img_processed = preprocess_image(img)
67
  embedding = DeepFace.represent(
68
  img_path=img_processed,
69
  model_name="Facenet",
70
  enforce_detection=False
71
  )[0]["embedding"]
72
+
73
  database.append((f"image_{i+j}", img, embedding))
74
  print(f"✅ Procesada imagen {i+j+1}/{len(dataset)}")
75
+
 
76
  del img_processed
77
  gc.collect()
78
+
79
+ except UnidentifiedImageError:
80
+ print(f"❌ Imagen no válida en {i+j}: no se pudo identificar")
81
+ continue
82
  except Exception as e:
83
  print(f"❌ No se pudo procesar imagen {i+j}: {str(e)}")
84
  continue
85
+
86
  # Guardar progreso
87
  if database:
88
  print("💾 Guardando progreso...")
89
  with open(EMBEDDINGS_FILE, 'wb') as f:
90
  pickle.dump(database, f)
91
+
92
  gc.collect()
93
+
94
  return database
95
 
96
  # 🔍 Buscar rostros similares
metadata.csv ADDED
The diff for this file is too large to render. See raw diff
 
metadata.py CHANGED
@@ -1,40 +1,23 @@
1
  from huggingface_hub import HfApi
2
  import csv
3
  import os
4
- from pathlib import Path
5
 
6
- # 🔐 Configuración segura del token
7
- HF_TOKEN = os.getenv("HF_TOKEN")
8
- if not HF_TOKEN:
9
- raise ValueError("⚠️ Por favor, configura la variable de entorno HF_TOKEN")
10
 
11
- # 🗂️ Configurar repositorio
12
  api = HfApi()
13
- repo_id = "Segizu/facial-recognition"
 
 
 
 
14
 
15
- try:
16
- # Listar todos los archivos del dataset
17
- files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=HF_TOKEN)
18
-
19
- # Filtrar imágenes .jpg
20
- image_files = [f for f in files if f.lower().endswith(".jpg")]
21
-
22
- # Guardar metadata.csv
23
- metadata_path = Path("metadata.csv")
24
- with open(metadata_path, "w", newline="") as f:
25
- writer = csv.writer(f)
26
- writer.writerow(["image"])
27
- for img in image_files:
28
- writer.writerow([img])
29
-
30
- print(f"✅ metadata.csv generado con {len(image_files)} imágenes.")
31
 
32
- except Exception as e:
33
- print(f"❌ Error: {str(e)}")
34
- if "401" in str(e):
35
- print("⚠️ Error de autenticación. Verifica que tu token de Hugging Face sea válido.")
36
- elif "404" in str(e):
37
- print("⚠️ No se encontró el repositorio. Verifica que el nombre del repositorio sea correcto.")
38
- else:
39
- print("⚠️ Ocurrió un error inesperado.")
40
 
 
1
  from huggingface_hub import HfApi
2
  import csv
3
  import os
 
4
 
5
+ HF_TOKEN = os.getenv("HF_TOKEN") or "hf_token"
6
+ repo_id = "Segizu/facial-recognition"
 
 
7
 
 
8
  api = HfApi()
9
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=HF_TOKEN)
10
+
11
+ # Generar URLs completas
12
+ base_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/"
13
+ image_urls = [base_url + f for f in files if f.lower().endswith(".jpg")]
14
 
15
+ # Escribir nuevo metadata.csv
16
+ with open("metadata.csv", "w", newline="") as f:
17
+ writer = csv.writer(f)
18
+ writer.writerow(["image"])
19
+ for url in image_urls:
20
+ writer.writerow([url])
 
 
 
 
 
 
 
 
 
 
21
 
22
+ print(f"✅ metadata.csv regenerado con URLs absolutas ({len(image_urls)} imágenes)")
 
 
 
 
 
 
 
23