File size: 8,126 Bytes
a4b33d8 b3b327d 138ec98 1132a05 b3b327d a4b33d8 1132a05 da38f38 bec31cf 1132a05 6c7f045 1132a05 6c7f045 1132a05 6c7f045 1132a05 6c7f045 1132a05 6c7f045 1132a05 6c7f045 1132a05 33a1f42 6c7f045 1132a05 6c7f045 1132a05 eba7edc 1132a05 6c7f045 2af011a 6c7f045 2af011a 6c7f045 1132a05 6c7f045 1132a05 6c7f045 2af011a 1132a05 510ef4a b2bb55d 33a1f42 6c7f045 33a1f42 6c7f045 33a1f42 6c7f045 1132a05 6c7f045 b2bb55d 33a1f42 6c7f045 da38f38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
import gradio as gr
import torch
from transformers import DebertaV2Model, DebertaV2Config, AutoTokenizer, PreTrainedModel
from transformers.models.deberta.modeling_deberta import ContextPooler
from transformers import pipeline, AutoModelForSequenceClassification
import torch.nn as nn
# Define the model and tokenizer
model_card = "microsoft/mdeberta-v3-base"
subjectivity_only_model = "MatteoFasulo/mdeberta-v3-base-subjectivity-multilingual-no-arabic"
sentiment_model = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic"
# Define some examples for the Gradio interface (cached to run on-the-fly)
examples = [
["But then Trump came to power and sidelined the defense hawks, ushering in a dramatic shift in Republican sentiment toward America's allies and adversaries."],
["Boxing Day ambush & flagship attack Putin has long tried to downplay the true losses his army has faced in the Black Sea."],
["Ho sentito dire che il PM italiano ha confessato che mangerà spaghetti stasera"],
["Sono arrabbiato e ho sentito dire che il PM italiano ha confessato che mangerà spaghetti stasera"],
["Vaffanculo e ho sentito dire che il PM italiano ha confessato che mangerà spaghetti stasera"]
]
class CustomModel(PreTrainedModel):
config_class = DebertaV2Config
def __init__(self, config, sentiment_dim=3, num_labels=2, *args, **kwargs):
super().__init__(config, *args, **kwargs)
self.deberta = DebertaV2Model(config)
self.pooler = ContextPooler(config)
output_dim = self.pooler.output_dim
self.dropout = nn.Dropout(0.1)
self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)
def forward(self, input_ids, positive, neutral, negative, token_type_ids=None, attention_mask=None, labels=None):
outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
encoder_layer = outputs[0]
pooled_output = self.pooler(encoder_layer)
sentiment_features = torch.stack((positive, neutral, negative), dim=1).to(pooled_output.dtype)
combined_features = torch.cat((pooled_output, sentiment_features), dim=1)
logits = self.classifier(self.dropout(combined_features))
return {'logits': logits}
def load_tokenizer(model_name: str):
return AutoTokenizer.from_pretrained(model_name)
load_model_cache = {}
def load_model(model_name: str):
if model_name not in load_model_cache:
print(f"Loading model: {model_name}")
if 'sentiment' in model_name:
config = DebertaV2Config.from_pretrained(
model_name, num_labels=2, id2label={0: 'OBJ', 1: 'SUBJ'}, label2id={'OBJ': 0, 'SUBJ': 1},
output_attentions=False, output_hidden_states=False
)
model_instance = CustomModel(config=config, sentiment_dim=3, num_labels=2).from_pretrained(model_name)
else:
model_instance = AutoModelForSequenceClassification.from_pretrained(
model_name, num_labels=2, id2label={0: 'OBJ', 1: 'SUBJ'}, label2id={'OBJ': 0, 'SUBJ': 1},
output_attentions=False, output_hidden_states=False
)
load_model_cache[model_name] = model_instance
return load_model_cache[model_name]
sentiment_pipeline_cache = None #
def get_sentiment_values(text: str):
global sentiment_pipeline_cache
if sentiment_pipeline_cache is None:
print("Loading sentiment pipeline...")
sentiment_pipeline_cache = pipeline(
"sentiment-analysis",
model="cardiffnlp/twitter-xlm-roberta-base-sentiment",
tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment",
top_k=None
)
sentiments_output = sentiment_pipeline_cache(text)
if sentiments_output and isinstance(sentiments_output, list) and sentiments_output[0]:
sentiments = sentiments_output[0]
return {s['label'].lower(): s['score'] for s in sentiments}
return {}
def analyze(text):
if not text or not text.strip():
empty_data = [
["Positive", ""], ["Neutral", ""], ["Negative", ""],
["Sent-Subj OBJ", ""], ["Sent-Subj SUBJ", ""],
["TextOnly OBJ", ""], ["TextOnly SUBJ", ""]
]
return empty_data
sentiment_values = get_sentiment_values(text)
tokenizer = load_tokenizer(model_card)
model_with_sentiment = load_model(sentiment_model)
model_without_sentiment = load_model(subjectivity_only_model)
inputs_dict = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')
device = next(model_without_sentiment.parameters()).device
inputs_dict_on_device = {k: v.to(device) for k, v in inputs_dict.items()}
outputs_base = model_without_sentiment(**inputs_dict_on_device)
logits_base = outputs_base.get('logits')
prob_base = torch.nn.functional.softmax(logits_base, dim=1)[0]
positive = sentiment_values.get('positive', 0.0)
neutral = sentiment_values.get('neutral', 0.0)
negative = sentiment_values.get('negative', 0.0)
current_inputs_for_sentiment_model = inputs_dict_on_device.copy()
current_inputs_for_sentiment_model['positive'] = torch.tensor(positive, device=device).unsqueeze(0).float()
current_inputs_for_sentiment_model['neutral'] = torch.tensor(neutral, device=device).unsqueeze(0).float()
current_inputs_for_sentiment_model['negative'] = torch.tensor(negative, device=device).unsqueeze(0).float()
outputs_sentiment = model_with_sentiment(**current_inputs_for_sentiment_model)
logits_sentiment = outputs_sentiment.get('logits')
prob_sentiment = torch.nn.functional.softmax(logits_sentiment, dim=1)[0]
table_data = [
["Positive", f"{positive:.2%}"],
["Neutral", f"{neutral:.2%}"],
["Negative", f"{negative:.2%}"],
["Sent-Subj OBJ", f"{prob_sentiment[0]:.2%}"],
["Sent-Subj SUBJ", f"{prob_sentiment[1]:.2%}"],
["TextOnly OBJ", f"{prob_base[0]:.2%}"],
["TextOnly SUBJ", f"{prob_base[1]:.2%}"]
]
return table_data
def load_default_example_on_startup():
print("Loading default example on startup...")
if examples and examples[0] and isinstance(examples[0], list) and examples[0]:
default_text = examples[0][0]
default_analysis_results = analyze(default_text)
return default_text, default_analysis_results
print("Warning: No valid default example found. Loading empty.")
empty_text = ""
empty_results = analyze(empty_text)
return empty_text, empty_results
with gr.Blocks(theme=gr.themes.Ocean(), title="Subjectivity & Sentiment Dashboard") as demo:
gr.Markdown("# 🚀 Subjectivity & Sentiment Analysis Dashboard 🚀")
with gr.Column():
txt = gr.Textbox(
label="Enter text to analyze",
placeholder="Paste news sentence here...",
lines=2,
)
with gr.Row():
gr.Column(scale=1, min_width=0)
btn = gr.Button(
"Analyze 🔍",
variant="primary",
size="md",
scale=0
)
with gr.Tabs():
with gr.TabItem("Raw Scores 📋"):
table = gr.Dataframe(
headers=["Metric", "Value"],
datatype=["str", "str"],
interactive=False
)
with gr.TabItem("About ℹ️"):
gr.Markdown(
"This dashboard uses two DeBERTa-based models (with and without sentiment integration) "
"to detect subjectivity, alongside sentiment scores from an XLM-RoBERTa model."
)
with gr.Row():
gr.Markdown("### Examples:")
gr.Examples(
examples=examples,
inputs=txt,
outputs=[table],
fn=analyze,
label="Click an example to analyze",
cache_examples=True,
)
btn.click(fn=analyze, inputs=txt, outputs=[table])
demo.load(
fn=load_default_example_on_startup,
inputs=None,
outputs=[txt, table]
)
demo.queue().launch() |