Spaces:

MatteoFasulo
/

SubjectivityDetection

Sleeping

App Files Files Community

SubjectivityDetection / app.py

MatteoFasulo

Update with class probabilities of models with and without sentiment

c2ae4ec verified 25 days ago

raw

history blame

5.08 kB

	import gradio as gr
	import torch
	from transformers import DebertaV2Model, DebertaV2Config, AutoTokenizer, PreTrainedModel
	from transformers.models.deberta.modeling_deberta import ContextPooler
	from transformers import pipeline
	import torch.nn as nn

	# Model cards and thresholds
	BASE_MODEL = "microsoft/mdeberta-v3-base"
	SENT_SUBJ_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic"
	SUBJ_ONLY_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-multilingual-no-arabic"
	THRESHOLD = 0.65

	# Custom model for subjectivity (+ optional sentiment features)
	class CustomModel(PreTrainedModel):
	config_class = DebertaV2Config

	def __init__(self, config, sentiment_dim=0, num_labels=2, args, *kwargs):
	super().__init__(config, args, *kwargs)
	self.deberta = DebertaV2Model(config)
	self.pooler = ContextPooler(config)
	output_dim = self.pooler.output_dim
	self.dropout = nn.Dropout(0.1)
	self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)

	def forward(self, input_ids, attention_mask=None, token_type_ids=None,
	positive=None, neutral=None, negative=None):
	outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
	pooled = self.pooler(outputs[0])
	if positive is not None and neutral is not None and negative is not None:
	sent_feats = torch.stack((positive, neutral, negative), dim=1)
	combined = torch.cat((pooled, sent_feats), dim=1)
	else:
	combined = pooled
	logits = self.classifier(self.dropout(combined))
	return logits

	# Load tokenizer and model helper
	def load_models():
	# Tokenizer shared
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
	# Sentiment+Subjectivity model
	cfg1 = DebertaV2Config.from_pretrained(
	SENT_SUBJ_MODEL,
	num_labels=2,
	id2label={0: 'OBJ', 1: 'SUBJ'},
	label2id={'OBJ': 0, 'SUBJ': 1},
	output_attentions=False,
	output_hidden_states=False
	)
	model1 = CustomModel(config=cfg1, sentiment_dim=3)
	model1 = model1.from_pretrained(SENT_SUBJ_MODEL)
	# Subjectivity-only model
	cfg2 = DebertaV2Config.from_pretrained(
	SUBJ_ONLY_MODEL,
	num_labels=2,
	id2label={0: 'OBJ', 1: 'SUBJ'},
	label2id={'OBJ': 0, 'SUBJ': 1},
	output_attentions=False,
	output_hidden_states=False
	)
	model2 = CustomModel(config=cfg2, sentiment_dim=0)
	model2 = model2.from_pretrained(SUBJ_ONLY_MODEL)
	return tokenizer, model1, model2

	# Sentiment pipeline
	sentiment_pipe = pipeline(
	"sentiment-analysis",
	model="cardiffnlp/twitter-xlm-roberta-base-sentiment",
	tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment",
	top_k=None
	)

	def get_sentiment_scores(text: str):
	results = sentiment_pipe(text)[0]
	return {lbl: score for lbl, score in [(list(d.keys())[0], list(d.values())[0]) for d in results]}

	# Prediction function
	# Caches models on first call
	tokenizer, model_sent_subj, model_subj_only = None, None, None

	def predict_subjectivity(text):
	global tokenizer, model_sent_subj, model_subj_only
	if tokenizer is None:
	tokenizer, model_sent_subj, model_subj_only = load_models()

	# Tokenize input
	inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')

	# Sentiment + subjectivity model inference
	sent_scores = get_sentiment_scores(text)
	pos, neu, neg = sent_scores['positive'], sent_scores['neutral'], sent_scores['negative']
	logits1 = model_sent_subj(
	input_ids=inputs['input_ids'],
	attention_mask=inputs.get('attention_mask'),
	positive=torch.tensor([pos]),
	neutral=torch.tensor([neu]),
	negative=torch.tensor([neg])
	)
	probs1 = torch.softmax(logits1, dim=1)[0]

	# Subjectivity-only model inference
	logits2 = model_subj_only(
	input_ids=inputs['input_ids'],
	attention_mask=inputs.get('attention_mask')
	)
	probs2 = torch.softmax(logits2, dim=1)[0]

	# Formatting
	output = []
	output.append("Sentiment Scores (sent-subj model):")
	output.append(f"- Positive: {pos:.2%}")
	output.append(f"- Neutral: {neu:.2%}")
	output.append(f"- Negative: {neg:.2%}\n")

	output.append(f"Subjectivity (with sentiment) - OBJ: {probs1[0]:.2%}, SUBJ: {probs1[1]:.2%}")
	output.append(f"Subjectivity (text only) - OBJ: {probs2[0]:.2%}, SUBJ: {probs2[1]:.2%}")

	return "\n".join(output)

	# Build Gradio interface
	demo = gr.Interface(
	fn=predict_subjectivity,
	inputs=gr.Textbox(
	label='Input sentence',
	placeholder='Enter a sentence from a news article',
	info='Paste a sentence from a news article to determine subjectivity'
	),
	outputs=gr.Textbox(
	label='Results',
	info='Sentiment & dual-model subjectivity probabilities'
	),
	title='Dual-Model Subjectivity Detection',
	description='Outputs sentiment scores and class probabilities from two subjectivity models.'
	)

	demo.launch()