Spaces:

MatteoFasulo
/

SubjectivityDetection

Sleeping

App Files Files Community

SubjectivityDetection / app.py

MatteoFasulo

Update with examples and cleaner logic

1132a05 verified 25 days ago

raw

history blame

6.41 kB

	import gradio as gr
	import torch
	from transformers import DebertaV2Model, DebertaV2Config, AutoTokenizer, PreTrainedModel
	from transformers.models.deberta.modeling_deberta import ContextPooler
	from transformers import pipeline, AutoModelForSequenceClassification
	import torch.nn as nn

	# Define the model and tokenizer
	model_card = "microsoft/mdeberta-v3-base"
	subjectivity_only_model = "MatteoFasulo/mdeberta-v3-base-subjectivity-multilingual-no-arabic"
	sentiment_model = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic"

	# Define some examples for the Gradio interface (cached to run on-the-fly)
	examples = [
	['Example1'],
	['Example2'],
	['Example3'],
	]

	# Custom model class for combining sentiment analysis with subjectivity detection
	class CustomModel(PreTrainedModel):
	config_class = DebertaV2Config

	def __init__(self, config, sentiment_dim=3, num_labels=2, args, *kwargs):
	super().__init__(config, args, *kwargs)
	self.deberta = DebertaV2Model(config)
	self.pooler = ContextPooler(config)
	output_dim = self.pooler.output_dim
	self.dropout = nn.Dropout(0.1)

	self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)

	def forward(self, input_ids, positive, neutral, negative, token_type_ids=None, attention_mask=None, labels=None):
	outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)

	encoder_layer = outputs[0]
	pooled_output = self.pooler(encoder_layer)

	# Sentiment features as a single tensor
	sentiment_features = torch.stack((positive, neutral, negative), dim=1) # Shape: (batch_size, 3)

	# Combine CLS embedding with sentiment features
	combined_features = torch.cat((pooled_output, sentiment_features), dim=1)

	# Classification head
	logits = self.classifier(self.dropout(combined_features))

	return {'logits': logits}

	# Load the pre-trained tokenizer
	def load_tokenizer(model_name: str):
	return AutoTokenizer.from_pretrained(model_name)

	# Load the pre-trained model
	def load_model(model_name: str):

	if 'sentiment' in model_name:
	config = DebertaV2Config.from_pretrained(
	model_name,
	num_labels=2,
	id2label={0: 'OBJ', 1: 'SUBJ'},
	label2id={'OBJ': 0, 'SUBJ': 1},
	output_attentions=False,
	output_hidden_states=False
	)

	model = CustomModel(config=config, sentiment_dim=3, num_labels=2).from_pretrained(model_name)

	else:
	model = AutoModelForSequenceClassification.from_pretrained(
	model_name,
	num_labels=2,
	id2label={0: 'OBJ', 1: 'SUBJ'},
	label2id={'OBJ': 0, 'SUBJ': 1},
	output_attentions=False,
	output_hidden_states=False
	)

	return model

	# Get sentiment values using a pre-trained sentiment analysis model
	def get_sentiment_values(text: str):
	pipe = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment", tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment", top_k=None)
	sentiments = pipe(text)[0]
	return {k:v for k,v in [(list(sentiment.values())[0], list(sentiment.values())[1]) for sentiment in sentiments]}

	# Modify the predict_subjectivity function to return additional information
	def analyze(text):
	# Extract sentiment values
	sentiment_values = get_sentiment_values(text)

	# Load the tokenizer and model
	tokenizer = load_tokenizer(model_card)
	sentiment_model = load_model(sentiment_model)
	subjectivity_model = load_model(subjectivity_only_model)

	# Tokenize
	inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')

	# Get the sentiment values
	positive = sentiment_values['positive']
	neutral = sentiment_values['neutral']
	negative = sentiment_values['negative']
	# Convert sentiment values to tensors
	inputs['positive'] = torch.tensor(positive).unsqueeze(0)
	inputs['neutral'] = torch.tensor(neutral).unsqueeze(0)
	inputs['negative'] = torch.tensor(negative).unsqueeze(0)

	# Get the sentiment model outputs
	outputs1 = sentiment_model(**inputs)
	logits1 = outputs1.get('logits')

	# Calculate probabilities using softmax
	p1 = torch.nn.functional.softmax(logits1, dim=1)[0]

	# Get the subjectivity model outputs
	outputs2 = subjectivity_model(**inputs)
	logits2 = outputs2.get('logits')
	# Calculate probabilities using softmax
	p2 = torch.nn.functional.softmax(logits2, dim=1)[0]

	# Format the output
	return {
	'Positive': f"{positive:.2%}", 'Neutral': f"{neutral:.2%}", 'Negative': f"{negative:.2%}",
	'Sent-Subj OBJ': f"{p1[0]:.2%}", 'Sent-Subj SUBJ': f"{p1[1]:.2%}",
	'TextOnly OBJ': f"{p2[0]:.2%}", 'TextOnly SUBJ': f"{p2[1]:.2%}"
	}

	# Update the Gradio interface
	with gr.Blocks(theme=gr.themes.Soft(), css="""
	#result_table td { padding: 8px; font-size: 1rem; }
	#header { text-align: center; font-size: 2rem; font-weight: bold; margin-bottom: 10px; }
	""") as demo:
	gr.Markdown("<div id='header'>🚀 Advanced Subjectivity & Sentiment Dashboard 🚀</div>")
	with gr.Row():
	txt = gr.Textbox(label="Enter text to analyze", placeholder="Paste news sentence here...", lines=2)
	btn = gr.Button("Analyze 🔍", variant="primary")
	with gr.Tabs():
	with gr.TabItem("Overview 📊"):
	chart = gr.BarPlot(x="category", y="value", label="Results", elem_id="result_chart")
	with gr.TabItem("Raw Scores 📋"):
	table = gr.Dataframe(headers=["Metric", "Value"], datatype=["str","str"], interactive=False, elem_id="result_table")
	with gr.TabItem("About ℹ️"):
	gr.Markdown("This dashboard uses two DeBERTa-based models (with and without sentiment integration) to detect subjectivity, alongside sentiment scores from an XLM-RoBERTa model.")
	with gr.Row():
	gr.Markdown("### Examples:")
	gr.Examples(
	examples=examples,
	inputs=txt,
	label="Examples",
	elem_id="example_list",
	cache_examples=True,
	)
	# Link inputs to outputs
	btn.click(fn=analyze, inputs=txt, outputs=[chart, table])

	demo.queue().launch(server_name="0.0.0.0", share=True)