face-shape / app.py
ruminasval's picture
Update app.py
b908b38 verified
import streamlit as st
import torch
import cv2
import mediapipe as mp
from transformers import SwinForImageClassification, AutoFeatureExtractor
from PIL import Image
import numpy as np
# Initialize face detection
mp_face_detection = mp.solutions.face_detection.FaceDetection(
model_selection=1, min_detection_confidence=0.5)
# Initialize model and labels
@st.cache_resource
def load_model():
id2label = {0: 'Heart', 1: 'Oblong', 2: 'Oval', 3: 'Round', 4: 'Square'} # Moved inside load_model
label2id = {v: k for k, v in id2label.items()}
model = SwinForImageClassification.from_pretrained(
"microsoft/swin-tiny-patch4-window7-224",
label2id=label2id,
id2label=id2label,
ignore_mismatched_sizes=True
)
model.load_state_dict(torch.load('swin.pth', map_location='cpu'))
model.eval()
return model, AutoFeatureExtractor.from_pretrained("microsoft/swin-tiny-patch4-window7-224"), id2label # Return id2label
# Load model components and labels
model, feature_extractor, id2label = load_model() # Receive id2label here
glasses_recommendations = {
"Heart": "Rimless (tanpa bingkai bawah)",
"Oblong": "Kotak",
"Oval": "Berbagai bentuk bingkai",
"Round": "Kotak",
"Square": "Oval atau bundar"
}
def preprocess_image(image):
results = mp_face_detection.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
if results.detections:
detection = results.detections[0]
bbox = detection.location_data.relative_bounding_box
h, w, _ = image.shape
x1 = max(0, int(bbox.xmin * w))
y1 = max(0, int(bbox.ymin * h))
x2 = min(w, int((bbox.xmin + bbox.width) * w))
y2 = min(h, int((bbox.ymin + bbox.height) * h))
# Add validation check
if (x2 <= x1) or (y2 <= y1) or (x2 - x1 < 10) or (y2 - y1 < 10):
raise ValueError("Invalid face crop dimensions")
image = image[y1:y2, x1:x2]
else:
raise ValueError("No face detected")
image = cv2.resize(image, (224, 224))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Convert to PIL and extract
image_pil = Image.fromarray(image)
inputs = feature_extractor(images=image_pil, return_tensors="pt")
return inputs['pixel_values']
def predict_face_shape(image):
# Force CPU usage on Hugging Face Spaces
device = torch.device("cpu")
image_tensor = preprocess_image(image).to(device)
with torch.no_grad():
outputs = model(image_tensor)
predicted_class_idx = torch.argmax(outputs.logits, dim=1).item()
return id2label[predicted_class_idx]
# Streamlit UI
st.title("Face Shape & Glasses Recommender")
st.write("Upload a face photo for shape analysis and glasses recommendations")
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
image = Image.open(uploaded_file).convert('RGB')
img_array = np.array(image)
st.image(image, caption='Uploaded Image', use_column_width=True)
try:
with st.spinner('Analyzing...'):
# Convert PIL image to OpenCV format correctly
prediction = predict_face_shape(cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR))
recommendation = glasses_recommendations[prediction]
st.success(f"Predicted Face Shape: **{prediction}**")
st.info(f"Recommended Glasses Frame: **{recommendation}**")
except Exception as e:
st.error(f"Error: {str(e)}")