File size: 5,084 Bytes
a4b33d8 b3b327d 138ec98 b3b327d a4b33d8 c2ae4ec e91e5d5 a4b33d8 c2ae4ec fbdaedd c2ae4ec fbdaedd c2ae4ec fbdaedd c2ae4ec fbdaedd c2ae4ec fbdaedd c2ae4ec fbdaedd c2ae4ec fbdaedd c2ae4ec e91e5d5 c2ae4ec fbdaedd c2ae4ec fbdaedd c2ae4ec e91e5d5 c2ae4ec e91e5d5 c2ae4ec e91e5d5 c2ae4ec fbdaedd c2ae4ec fbdaedd c2ae4ec fbdaedd c2ae4ec fbdaedd e91e5d5 c2ae4ec fbdaedd c2ae4ec fbdaedd e91e5d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import gradio as gr
import torch
from transformers import DebertaV2Model, DebertaV2Config, AutoTokenizer, PreTrainedModel
from transformers.models.deberta.modeling_deberta import ContextPooler
from transformers import pipeline
import torch.nn as nn
# Model cards and thresholds
BASE_MODEL = "microsoft/mdeberta-v3-base"
SENT_SUBJ_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic"
SUBJ_ONLY_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-multilingual-no-arabic"
THRESHOLD = 0.65
# Custom model for subjectivity (+ optional sentiment features)
class CustomModel(PreTrainedModel):
config_class = DebertaV2Config
def __init__(self, config, sentiment_dim=0, num_labels=2, *args, **kwargs):
super().__init__(config, *args, **kwargs)
self.deberta = DebertaV2Model(config)
self.pooler = ContextPooler(config)
output_dim = self.pooler.output_dim
self.dropout = nn.Dropout(0.1)
self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)
def forward(self, input_ids, attention_mask=None, token_type_ids=None,
positive=None, neutral=None, negative=None):
outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
pooled = self.pooler(outputs[0])
if positive is not None and neutral is not None and negative is not None:
sent_feats = torch.stack((positive, neutral, negative), dim=1)
combined = torch.cat((pooled, sent_feats), dim=1)
else:
combined = pooled
logits = self.classifier(self.dropout(combined))
return logits
# Load tokenizer and model helper
def load_models():
# Tokenizer shared
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
# Sentiment+Subjectivity model
cfg1 = DebertaV2Config.from_pretrained(
SENT_SUBJ_MODEL,
num_labels=2,
id2label={0: 'OBJ', 1: 'SUBJ'},
label2id={'OBJ': 0, 'SUBJ': 1},
output_attentions=False,
output_hidden_states=False
)
model1 = CustomModel(config=cfg1, sentiment_dim=3)
model1 = model1.from_pretrained(SENT_SUBJ_MODEL)
# Subjectivity-only model
cfg2 = DebertaV2Config.from_pretrained(
SUBJ_ONLY_MODEL,
num_labels=2,
id2label={0: 'OBJ', 1: 'SUBJ'},
label2id={'OBJ': 0, 'SUBJ': 1},
output_attentions=False,
output_hidden_states=False
)
model2 = CustomModel(config=cfg2, sentiment_dim=0)
model2 = model2.from_pretrained(SUBJ_ONLY_MODEL)
return tokenizer, model1, model2
# Sentiment pipeline
sentiment_pipe = pipeline(
"sentiment-analysis",
model="cardiffnlp/twitter-xlm-roberta-base-sentiment",
tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment",
top_k=None
)
def get_sentiment_scores(text: str):
results = sentiment_pipe(text)[0]
return {lbl: score for lbl, score in [(list(d.keys())[0], list(d.values())[0]) for d in results]}
# Prediction function
# Caches models on first call
tokenizer, model_sent_subj, model_subj_only = None, None, None
def predict_subjectivity(text):
global tokenizer, model_sent_subj, model_subj_only
if tokenizer is None:
tokenizer, model_sent_subj, model_subj_only = load_models()
# Tokenize input
inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')
# Sentiment + subjectivity model inference
sent_scores = get_sentiment_scores(text)
pos, neu, neg = sent_scores['positive'], sent_scores['neutral'], sent_scores['negative']
logits1 = model_sent_subj(
input_ids=inputs['input_ids'],
attention_mask=inputs.get('attention_mask'),
positive=torch.tensor([pos]),
neutral=torch.tensor([neu]),
negative=torch.tensor([neg])
)
probs1 = torch.softmax(logits1, dim=1)[0]
# Subjectivity-only model inference
logits2 = model_subj_only(
input_ids=inputs['input_ids'],
attention_mask=inputs.get('attention_mask')
)
probs2 = torch.softmax(logits2, dim=1)[0]
# Formatting
output = []
output.append("Sentiment Scores (sent-subj model):")
output.append(f"- Positive: {pos:.2%}")
output.append(f"- Neutral: {neu:.2%}")
output.append(f"- Negative: {neg:.2%}\n")
output.append(f"Subjectivity (with sentiment) - OBJ: {probs1[0]:.2%}, SUBJ: {probs1[1]:.2%}")
output.append(f"Subjectivity (text only) - OBJ: {probs2[0]:.2%}, SUBJ: {probs2[1]:.2%}")
return "\n".join(output)
# Build Gradio interface
demo = gr.Interface(
fn=predict_subjectivity,
inputs=gr.Textbox(
label='Input sentence',
placeholder='Enter a sentence from a news article',
info='Paste a sentence from a news article to determine subjectivity'
),
outputs=gr.Textbox(
label='Results',
info='Sentiment & dual-model subjectivity probabilities'
),
title='Dual-Model Subjectivity Detection',
description='Outputs sentiment scores and class probabilities from two subjectivity models.'
)
demo.launch() |