import gradio as gr
from fastai.vision.all import *
from transformers import AutoImageProcessor, AutoModelForImageClassification
from PIL import Image
import requests
import face_recognition

learn_inf = load_learner("export.pkl")
processor = AutoImageProcessor.from_pretrained("dima806/facial_emotions_image_detection")
model = AutoModelForImageClassification.from_pretrained("dima806/facial_emotions_image_detection")

def extract_face(image)-> Image.Image:
    # Detect face locations
    face_locations = face_recognition.face_locations(image)

    # If a face is detected, extract the first one
    if face_locations:
        top, right, bottom, left = face_locations[0]
        face_image = Image.fromarray(image[top:bottom, left:right])
        return face_image
    else:
        return image

def predict(value) -> str:
    image = extract_face(Image.fromarray(value)).convert("L")
    inputs = processor(images=image, return_tensors="pt")
    outputs = model(**inputs)
    logits = outputs.logits
    predicted_class_idx = logits.argmax(-1).item()
    return model.config.id2label[predicted_class_idx]

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            input_img = gr.Image(label="Input", sources="webcam")
        with gr.Column():
            output_lbl = gr.Label(value="Output", label="Expression Prediction")
        input_img.stream(fn=predict, inputs=input_img, outputs=output_lbl, time_limit=15, stream_every=0.1, concurrency_limit=30)

if __name__ == "__main__":

    demo.launch()