Spaces:

MatteoFasulo
/

SubjectivityDetection

Sleeping

App Files Files Community

SubjectivityDetection / app.py

MatteoFasulo

Update app.py

13f2506 verified 25 days ago

raw

history blame

6.47 kB

	import gradio as gr
	import torch
	from transformers import DebertaV2Model, DebertaV2Config, AutoTokenizer, PreTrainedModel
	from transformers.models.deberta.modeling_deberta import ContextPooler
	from transformers import pipeline, AutoModelForSequenceClassification
	import torch.nn as nn

	# Define the model and tokenizer
	model_card = "microsoft/mdeberta-v3-base"
	subjectivity_only_model = "MatteoFasulo/mdeberta-v3-base-subjectivity-multilingual-no-arabic"
	sentiment_model = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic"

	# Define some examples for the Gradio interface (cached to run on-the-fly)
	examples = [
	['Example1'],
	['Example2'],
	['Example3'],
	]

	# Custom model class for combining sentiment analysis with subjectivity detection
	class CustomModel(PreTrainedModel):
	config_class = DebertaV2Config

	def __init__(self, config, sentiment_dim=3, num_labels=2, args, *kwargs):
	super().__init__(config, args, *kwargs)
	self.deberta = DebertaV2Model(config)
	self.pooler = ContextPooler(config)
	output_dim = self.pooler.output_dim
	self.dropout = nn.Dropout(0.1)

	self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)

	def forward(self, input_ids, positive, neutral, negative, token_type_ids=None, attention_mask=None, labels=None):
	outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)

	encoder_layer = outputs[0]
	pooled_output = self.pooler(encoder_layer)

	# Sentiment features as a single tensor
	sentiment_features = torch.stack((positive, neutral, negative), dim=1) # Shape: (batch_size, 3)

	# Combine CLS embedding with sentiment features
	combined_features = torch.cat((pooled_output, sentiment_features), dim=1)

	# Classification head
	logits = self.classifier(self.dropout(combined_features))

	return {'logits': logits}

	# Load the pre-trained tokenizer
	def load_tokenizer(model_name: str):
	return AutoTokenizer.from_pretrained(model_name)

	# Load the pre-trained model
	def load_model(model_name: str):

	if 'sentiment' in model_name:
	config = DebertaV2Config.from_pretrained(
	model_name,
	num_labels=2,
	id2label={0: 'OBJ', 1: 'SUBJ'},
	label2id={'OBJ': 0, 'SUBJ': 1},
	output_attentions=False,
	output_hidden_states=False
	)

	model = CustomModel(config=config, sentiment_dim=3, num_labels=2).from_pretrained(model_name)

	else:
	model = AutoModelForSequenceClassification.from_pretrained(
	model_name,
	num_labels=2,
	id2label={0: 'OBJ', 1: 'SUBJ'},
	label2id={'OBJ': 0, 'SUBJ': 1},
	output_attentions=False,
	output_hidden_states=False
	)

	return model

	# Get sentiment values using a pre-trained sentiment analysis model
	def get_sentiment_values(text: str):
	pipe = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment", tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment", top_k=None)
	sentiments = pipe(text)[0]
	return {k:v for k,v in [(list(sentiment.values())[0], list(sentiment.values())[1]) for sentiment in sentiments]}

	# Modify the predict_subjectivity function to return additional information
	def analyze(text):
	# Extract sentiment values
	sentiment_values = get_sentiment_values(text)

	# Load the tokenizer and model
	tokenizer = load_tokenizer(model_card)
	sentiment_model = load_model(sentiment_model)
	subjectivity_model = load_model(subjectivity_only_model)

	# Tokenize
	inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')

	# Get the sentiment values
	positive = sentiment_values['positive']
	neutral = sentiment_values['neutral']
	negative = sentiment_values['negative']
	# Convert sentiment values to tensors
	inputs['positive'] = torch.tensor(positive).unsqueeze(0)
	inputs['neutral'] = torch.tensor(neutral).unsqueeze(0)
	inputs['negative'] = torch.tensor(negative).unsqueeze(0)

	# Get the sentiment model outputs
	outputs1 = sentiment_model(**inputs)
	logits1 = outputs1.get('logits')

	# Calculate probabilities using softmax
	p1 = torch.nn.functional.softmax(logits1, dim=1)[0]

	# Get the subjectivity model outputs
	outputs2 = subjectivity_model(**inputs)
	logits2 = outputs2.get('logits')
	# Calculate probabilities using softmax
	p2 = torch.nn.functional.softmax(logits2, dim=1)[0]

	# Format the output
	return {
	'Positive': f"{positive:.2%}", 'Neutral': f"{neutral:.2%}", 'Negative': f"{negative:.2%}",
	'Sent-Subj OBJ': f"{p1[0]:.2%}", 'Sent-Subj SUBJ': f"{p1[1]:.2%}",
	'TextOnly OBJ': f"{p2[0]:.2%}", 'TextOnly SUBJ': f"{p2[1]:.2%}"
	}

	# Update the Gradio interface
	with gr.Blocks(theme=gr.themes.Soft(), css="""
	#result_table td { padding: 8px; font-size: 1rem; }
	#header { text-align: center; font-size: 2rem; font-weight: bold; margin-bottom: 10px; }
	""") as demo:
	gr.Markdown("<div id='header'>🚀 Advanced Subjectivity & Sentiment Dashboard 🚀</div>")
	with gr.Row():
	txt = gr.Textbox(label="Enter text to analyze", placeholder="Paste news sentence here...", lines=2)
	btn = gr.Button("Analyze 🔍", variant="primary")
	with gr.Tabs():
	with gr.TabItem("Overview 📊"):
	chart = gr.BarPlot(x="category", y="value", label="Results", elem_id="result_chart")
	with gr.TabItem("Raw Scores 📋"):
	table = gr.Dataframe(headers=["Metric", "Value"], datatype=["str","str"], interactive=False, elem_id="result_table")
	with gr.TabItem("About ℹ️"):
	gr.Markdown("This dashboard uses two DeBERTa-based models (with and without sentiment integration) to detect subjectivity, alongside sentiment scores from an XLM-RoBERTa model.")
	with gr.Row():
	gr.Markdown("### Examples:")
	gr.Examples(
	examples=examples,
	inputs=txt,
	outputs=[chart, table],
	fn=analyze,
	label="Examples",
	elem_id="example_list",
	cache_examples=True,
	)
	# Link inputs to outputs
	btn.click(fn=analyze, inputs=txt, outputs=[chart, table])

	demo.queue().launch(server_name="0.0.0.0", share=True)