MatteoFasulo's picture
Update with class probabilities of models with and without sentiment
c2ae4ec verified
raw
history blame
5.08 kB
import gradio as gr
import torch
from transformers import DebertaV2Model, DebertaV2Config, AutoTokenizer, PreTrainedModel
from transformers.models.deberta.modeling_deberta import ContextPooler
from transformers import pipeline
import torch.nn as nn
# Model cards and thresholds
BASE_MODEL = "microsoft/mdeberta-v3-base"
SENT_SUBJ_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic"
SUBJ_ONLY_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-multilingual-no-arabic"
THRESHOLD = 0.65
# Custom model for subjectivity (+ optional sentiment features)
class CustomModel(PreTrainedModel):
config_class = DebertaV2Config
def __init__(self, config, sentiment_dim=0, num_labels=2, *args, **kwargs):
super().__init__(config, *args, **kwargs)
self.deberta = DebertaV2Model(config)
self.pooler = ContextPooler(config)
output_dim = self.pooler.output_dim
self.dropout = nn.Dropout(0.1)
self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)
def forward(self, input_ids, attention_mask=None, token_type_ids=None,
positive=None, neutral=None, negative=None):
outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
pooled = self.pooler(outputs[0])
if positive is not None and neutral is not None and negative is not None:
sent_feats = torch.stack((positive, neutral, negative), dim=1)
combined = torch.cat((pooled, sent_feats), dim=1)
else:
combined = pooled
logits = self.classifier(self.dropout(combined))
return logits
# Load tokenizer and model helper
def load_models():
# Tokenizer shared
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
# Sentiment+Subjectivity model
cfg1 = DebertaV2Config.from_pretrained(
SENT_SUBJ_MODEL,
num_labels=2,
id2label={0: 'OBJ', 1: 'SUBJ'},
label2id={'OBJ': 0, 'SUBJ': 1},
output_attentions=False,
output_hidden_states=False
)
model1 = CustomModel(config=cfg1, sentiment_dim=3)
model1 = model1.from_pretrained(SENT_SUBJ_MODEL)
# Subjectivity-only model
cfg2 = DebertaV2Config.from_pretrained(
SUBJ_ONLY_MODEL,
num_labels=2,
id2label={0: 'OBJ', 1: 'SUBJ'},
label2id={'OBJ': 0, 'SUBJ': 1},
output_attentions=False,
output_hidden_states=False
)
model2 = CustomModel(config=cfg2, sentiment_dim=0)
model2 = model2.from_pretrained(SUBJ_ONLY_MODEL)
return tokenizer, model1, model2
# Sentiment pipeline
sentiment_pipe = pipeline(
"sentiment-analysis",
model="cardiffnlp/twitter-xlm-roberta-base-sentiment",
tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment",
top_k=None
)
def get_sentiment_scores(text: str):
results = sentiment_pipe(text)[0]
return {lbl: score for lbl, score in [(list(d.keys())[0], list(d.values())[0]) for d in results]}
# Prediction function
# Caches models on first call
tokenizer, model_sent_subj, model_subj_only = None, None, None
def predict_subjectivity(text):
global tokenizer, model_sent_subj, model_subj_only
if tokenizer is None:
tokenizer, model_sent_subj, model_subj_only = load_models()
# Tokenize input
inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')
# Sentiment + subjectivity model inference
sent_scores = get_sentiment_scores(text)
pos, neu, neg = sent_scores['positive'], sent_scores['neutral'], sent_scores['negative']
logits1 = model_sent_subj(
input_ids=inputs['input_ids'],
attention_mask=inputs.get('attention_mask'),
positive=torch.tensor([pos]),
neutral=torch.tensor([neu]),
negative=torch.tensor([neg])
)
probs1 = torch.softmax(logits1, dim=1)[0]
# Subjectivity-only model inference
logits2 = model_subj_only(
input_ids=inputs['input_ids'],
attention_mask=inputs.get('attention_mask')
)
probs2 = torch.softmax(logits2, dim=1)[0]
# Formatting
output = []
output.append("Sentiment Scores (sent-subj model):")
output.append(f"- Positive: {pos:.2%}")
output.append(f"- Neutral: {neu:.2%}")
output.append(f"- Negative: {neg:.2%}\n")
output.append(f"Subjectivity (with sentiment) - OBJ: {probs1[0]:.2%}, SUBJ: {probs1[1]:.2%}")
output.append(f"Subjectivity (text only) - OBJ: {probs2[0]:.2%}, SUBJ: {probs2[1]:.2%}")
return "\n".join(output)
# Build Gradio interface
demo = gr.Interface(
fn=predict_subjectivity,
inputs=gr.Textbox(
label='Input sentence',
placeholder='Enter a sentence from a news article',
info='Paste a sentence from a news article to determine subjectivity'
),
outputs=gr.Textbox(
label='Results',
info='Sentiment & dual-model subjectivity probabilities'
),
title='Dual-Model Subjectivity Detection',
description='Outputs sentiment scores and class probabilities from two subjectivity models.'
)
demo.launch()