File size: 8,126 Bytes
a4b33d8
b3b327d
138ec98
 
1132a05
b3b327d
a4b33d8
1132a05
 
 
 
 
 
 
da38f38
 
bec31cf
 
 
1132a05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c7f045
1132a05
 
 
 
 
 
 
6c7f045
1132a05
6c7f045
 
 
 
 
 
1132a05
6c7f045
 
 
 
 
 
 
 
1132a05
6c7f045
1132a05
6c7f045
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1132a05
33a1f42
6c7f045
 
 
 
 
 
 
1132a05
6c7f045
1132a05
eba7edc
 
1132a05
6c7f045
 
 
 
2af011a
6c7f045
2af011a
 
6c7f045
 
 
 
 
1132a05
6c7f045
 
 
 
1132a05
6c7f045
 
 
2af011a
1132a05
510ef4a
 
 
 
 
 
 
 
 
b2bb55d
33a1f42
6c7f045
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33a1f42
 
6c7f045
 
 
 
 
33a1f42
6c7f045
 
 
 
 
1132a05
 
6c7f045
 
 
 
 
 
 
 
 
 
 
b2bb55d
33a1f42
6c7f045
 
 
 
 
 
 
da38f38
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import gradio as gr
import torch
from transformers import DebertaV2Model, DebertaV2Config, AutoTokenizer, PreTrainedModel
from transformers.models.deberta.modeling_deberta import ContextPooler
from transformers import pipeline, AutoModelForSequenceClassification
import torch.nn as nn

# Define the model and tokenizer
model_card = "microsoft/mdeberta-v3-base"
subjectivity_only_model = "MatteoFasulo/mdeberta-v3-base-subjectivity-multilingual-no-arabic"
sentiment_model = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic"

# Define some examples for the Gradio interface (cached to run on-the-fly)
examples = [
    ["But then Trump came to power and sidelined the defense hawks, ushering in a dramatic shift in Republican sentiment toward America's allies and adversaries."],
    ["Boxing Day ambush & flagship attack Putin has long tried to downplay the true losses his army has faced in the Black Sea."],
    ["Ho sentito dire che il PM italiano ha confessato che mangerà spaghetti stasera"],
    ["Sono arrabbiato e ho sentito dire che il PM italiano ha confessato che mangerà spaghetti stasera"],
    ["Vaffanculo e ho sentito dire che il PM italiano ha confessato che mangerà spaghetti stasera"]
]

class CustomModel(PreTrainedModel):
    config_class = DebertaV2Config
    def __init__(self, config, sentiment_dim=3, num_labels=2, *args, **kwargs):
        super().__init__(config, *args, **kwargs)
        self.deberta = DebertaV2Model(config)
        self.pooler = ContextPooler(config)
        output_dim = self.pooler.output_dim
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)

    def forward(self, input_ids, positive, neutral, negative, token_type_ids=None, attention_mask=None, labels=None):
        outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
        encoder_layer = outputs[0]
        pooled_output = self.pooler(encoder_layer)
        sentiment_features = torch.stack((positive, neutral, negative), dim=1).to(pooled_output.dtype)
        combined_features = torch.cat((pooled_output, sentiment_features), dim=1)
        logits = self.classifier(self.dropout(combined_features))
        return {'logits': logits}

def load_tokenizer(model_name: str):
    return AutoTokenizer.from_pretrained(model_name)

load_model_cache = {} 
def load_model(model_name: str):
    if model_name not in load_model_cache:
        print(f"Loading model: {model_name}")
        if 'sentiment' in model_name:
            config = DebertaV2Config.from_pretrained(
                model_name, num_labels=2, id2label={0: 'OBJ', 1: 'SUBJ'}, label2id={'OBJ': 0, 'SUBJ': 1},
                output_attentions=False, output_hidden_states=False
            )
            model_instance = CustomModel(config=config, sentiment_dim=3, num_labels=2).from_pretrained(model_name)
        else:
            model_instance = AutoModelForSequenceClassification.from_pretrained(
                model_name, num_labels=2, id2label={0: 'OBJ', 1: 'SUBJ'}, label2id={'OBJ': 0, 'SUBJ': 1},
                output_attentions=False, output_hidden_states=False
            )
        load_model_cache[model_name] = model_instance
    return load_model_cache[model_name]

sentiment_pipeline_cache = None # 
def get_sentiment_values(text: str):
    global sentiment_pipeline_cache
    if sentiment_pipeline_cache is None:
        print("Loading sentiment pipeline...")
        sentiment_pipeline_cache = pipeline(
            "sentiment-analysis",
            model="cardiffnlp/twitter-xlm-roberta-base-sentiment",
            tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment",
            top_k=None
        )
    sentiments_output = sentiment_pipeline_cache(text)
    if sentiments_output and isinstance(sentiments_output, list) and sentiments_output[0]:
        sentiments = sentiments_output[0]
        return {s['label'].lower(): s['score'] for s in sentiments}
    return {} 


def analyze(text):
    if not text or not text.strip():
        empty_data = [
            ["Positive", ""], ["Neutral", ""], ["Negative", ""],
            ["Sent-Subj OBJ", ""], ["Sent-Subj SUBJ", ""],
            ["TextOnly OBJ", ""], ["TextOnly SUBJ", ""]
        ]
        return empty_data

    sentiment_values = get_sentiment_values(text)
    tokenizer = load_tokenizer(model_card)
    model_with_sentiment = load_model(sentiment_model)
    model_without_sentiment = load_model(subjectivity_only_model)

    inputs_dict = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')
    
    device = next(model_without_sentiment.parameters()).device 
    inputs_dict_on_device = {k: v.to(device) for k, v in inputs_dict.items()}

    outputs_base = model_without_sentiment(**inputs_dict_on_device)
    logits_base = outputs_base.get('logits')
    prob_base = torch.nn.functional.softmax(logits_base, dim=1)[0]

    positive = sentiment_values.get('positive', 0.0)
    neutral = sentiment_values.get('neutral', 0.0)
    negative = sentiment_values.get('negative', 0.0)

    
    current_inputs_for_sentiment_model = inputs_dict_on_device.copy()
    current_inputs_for_sentiment_model['positive'] = torch.tensor(positive, device=device).unsqueeze(0).float()
    current_inputs_for_sentiment_model['neutral'] = torch.tensor(neutral, device=device).unsqueeze(0).float()
    current_inputs_for_sentiment_model['negative'] = torch.tensor(negative, device=device).unsqueeze(0).float()


    outputs_sentiment = model_with_sentiment(**current_inputs_for_sentiment_model)
    logits_sentiment = outputs_sentiment.get('logits')
    prob_sentiment = torch.nn.functional.softmax(logits_sentiment, dim=1)[0]

    table_data = [
        ["Positive", f"{positive:.2%}"],
        ["Neutral", f"{neutral:.2%}"],
        ["Negative", f"{negative:.2%}"],
        ["Sent-Subj OBJ", f"{prob_sentiment[0]:.2%}"],
        ["Sent-Subj SUBJ", f"{prob_sentiment[1]:.2%}"],
        ["TextOnly OBJ", f"{prob_base[0]:.2%}"],
        ["TextOnly SUBJ", f"{prob_base[1]:.2%}"]
    ]
    return table_data

def load_default_example_on_startup():
    print("Loading default example on startup...")
    if examples and examples[0] and isinstance(examples[0], list) and examples[0]:
        default_text = examples[0][0]
        default_analysis_results = analyze(default_text)
        return default_text, default_analysis_results
    print("Warning: No valid default example found. Loading empty.")
    empty_text = ""
    empty_results = analyze(empty_text) 
    return empty_text, empty_results

with gr.Blocks(theme=gr.themes.Ocean(), title="Subjectivity & Sentiment Dashboard") as demo:
    gr.Markdown("# 🚀 Subjectivity & Sentiment Analysis Dashboard 🚀")

    with gr.Column():
        txt = gr.Textbox(
            label="Enter text to analyze",
            placeholder="Paste news sentence here...",
            lines=2,
        )
        with gr.Row():
            gr.Column(scale=1, min_width=0) 
            btn = gr.Button(
                "Analyze 🔍",
                variant="primary",
                size="md",
                scale=0
            )

    with gr.Tabs():
        with gr.TabItem("Raw Scores 📋"):
            table = gr.Dataframe(
                headers=["Metric", "Value"],
                datatype=["str", "str"],
                interactive=False
            )
        with gr.TabItem("About ℹ️"):
            gr.Markdown(
                "This dashboard uses two DeBERTa-based models (with and without sentiment integration) "
                "to detect subjectivity, alongside sentiment scores from an XLM-RoBERTa model."
            )

    with gr.Row():
        gr.Markdown("### Examples:")

    
    gr.Examples(
        examples=examples,
        inputs=txt,
        outputs=[table],
        fn=analyze, 
        label="Click an example to analyze",
        cache_examples=True,
    )

    btn.click(fn=analyze, inputs=txt, outputs=[table])

    
    demo.load(
        fn=load_default_example_on_startup, 
        inputs=None,
        outputs=[txt, table] 
    )

demo.queue().launch()