import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np

# ✅ Paths to your hosted models on Hugging Face Hub
MODEL_PATHS = [
    "Basavians/youtube-comment-sentiment-1",
    "Basavians/youtube-comment-sentiment-2",
    "Basavians/youtube-comment-sentiment-3"
]

# Load models and tokenizers (once at startup)
models = []
tokenizers = []
for path in MODEL_PATHS:
    tokenizer = AutoTokenizer.from_pretrained(path)
    model = AutoModelForSequenceClassification.from_pretrained(path)
    model.eval()
    tokenizers.append(tokenizer)
    models.append(model)

# Class labels (update if different)
LABELS = ["negative", "neutral", "positive"]

def predict_sentiment(text):
    if not text.strip():
        return "Please enter some text", None

    probs = []
    for model, tokenizer in zip(models, tokenizers):
        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            prob = torch.nn.functional.softmax(logits, dim=-1)
            probs.append(prob.numpy())

    # 🎯 Ensemble by averaging probabilities
    avg_prob = np.mean(probs, axis=0)
    pred_class = int(np.argmax(avg_prob, axis=1)[0])
    pred_label = LABELS[pred_class]
    confidence = float(avg_prob[0][pred_class])

    return pred_label, {label: float(avg_prob[0][i]) for i, label in enumerate(LABELS)}

# Gradio UI
demo = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(lines=4, placeholder="Paste a YouTube comment here..."),
    outputs=[
        gr.Label(num_top_classes=1, label="Predicted Sentiment"),
        gr.Label(label="Confidence Scores"),
    ],
    title="YouTube Comment Sentiment Classifier (Ensemble)",
    description="Enter a comment to see sentiment prediction based on an ensemble of 3 models."
)

demo.launch()