MatteoFasulo commited on
Commit
c2ae4ec
·
verified ·
1 Parent(s): 5f70d00

Update with class probabilities of models with and without sentiment

Browse files
Files changed (1) hide show
  1. app.py +89 -84
app.py CHANGED
@@ -5,128 +5,133 @@ from transformers.models.deberta.modeling_deberta import ContextPooler
5
  from transformers import pipeline
6
  import torch.nn as nn
7
 
8
- # Define the model and tokenizer
9
- model_card = "microsoft/mdeberta-v3-base"
10
- finetuned_model = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic"
 
11
  THRESHOLD = 0.65
12
 
13
- # Custom model class for combining sentiment analysis with subjectivity detection
14
  class CustomModel(PreTrainedModel):
15
  config_class = DebertaV2Config
16
 
17
- def __init__(self, config, sentiment_dim=3, num_labels=2, *args, **kwargs):
18
  super().__init__(config, *args, **kwargs)
19
  self.deberta = DebertaV2Model(config)
20
  self.pooler = ContextPooler(config)
21
  output_dim = self.pooler.output_dim
22
  self.dropout = nn.Dropout(0.1)
23
-
24
  self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)
25
 
26
- def forward(self, input_ids, positive, neutral, negative, token_type_ids=None, attention_mask=None, labels=None):
 
27
  outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
28
-
29
- encoder_layer = outputs[0]
30
- pooled_output = self.pooler(encoder_layer)
31
-
32
- # Sentiment features as a single tensor
33
- sentiment_features = torch.stack((positive, neutral, negative), dim=1) # Shape: (batch_size, 3)
34
-
35
- # Combine CLS embedding with sentiment features
36
- combined_features = torch.cat((pooled_output, sentiment_features), dim=1)
37
-
38
- # Classification head
39
- logits = self.classifier(self.dropout(combined_features))
40
-
41
- return {'logits': logits}
42
-
43
- # Load the pre-trained tokenizer
44
- def load_tokenizer(model_name: str):
45
- return AutoTokenizer.from_pretrained(model_name)
46
-
47
- # Load the pre-trained model
48
- def load_model(model_card: str, finetuned_model: str):
49
- tokenizer = AutoTokenizer.from_pretrained(model_card)
50
-
51
- config = DebertaV2Config.from_pretrained(
52
- finetuned_model,
53
  num_labels=2,
54
  id2label={0: 'OBJ', 1: 'SUBJ'},
55
  label2id={'OBJ': 0, 'SUBJ': 1},
56
  output_attentions=False,
57
  output_hidden_states=False
58
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- model = CustomModel(config=config, sentiment_dim=3, num_labels=2).from_pretrained(finetuned_model)
61
-
62
- return model
63
 
64
- # Get sentiment values using a pre-trained sentiment analysis model
65
- def get_sentiment_values(text: str):
66
- pipe = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment", tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment", top_k=None)
67
- sentiments = pipe(text)[0]
68
- return {k:v for k,v in [(list(sentiment.values())[0], list(sentiment.values())[1]) for sentiment in sentiments]}
69
 
70
- # Modify the predict_subjectivity function to return additional information
71
  def predict_subjectivity(text):
72
- sentiment_values = get_sentiment_values(text)
73
-
74
- model = load_model(model_card, finetuned_model)
75
- tokenizer = load_tokenizer(model_card)
76
-
77
- positive = sentiment_values['positive']
78
- neutral = sentiment_values['neutral']
79
- negative = sentiment_values['negative']
80
 
 
81
  inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')
82
- inputs['positive'] = torch.tensor(positive).unsqueeze(0)
83
- inputs['neutral'] = torch.tensor(neutral).unsqueeze(0)
84
- inputs['negative'] = torch.tensor(negative).unsqueeze(0)
85
 
86
- outputs = model(**inputs)
87
- logits = outputs.get('logits')
88
-
89
- # Calculate probabilities using softmax
90
- probabilities = torch.nn.functional.softmax(logits, dim=1)
91
- obj_prob, subj_prob = probabilities[0].tolist()
92
-
93
- # Predict the class given the decision threshold
94
- predicted_class_idx = 1 if subj_prob >= THRESHOLD else 0
95
- predicted_class = model.config.id2label[predicted_class_idx]
 
96
 
97
- # Format the output
98
- result = f"""Prediction: {predicted_class}
 
 
 
 
99
 
100
- Class Probabilities:
101
- - Objective: {obj_prob:.2%}
102
- - Subjective: {subj_prob:.2%}
 
 
 
103
 
104
- Sentiment Scores:
105
- - Positive: {positive:.2%}
106
- - Neutral: {neutral:.2%}
107
- - Negative: {negative:.2%}"""
108
 
109
- return result
110
 
111
- # Update the Gradio interface
112
  demo = gr.Interface(
113
- fn=predict_subjectivity,
114
  inputs=gr.Textbox(
115
  label='Input sentence',
116
  placeholder='Enter a sentence from a news article',
117
- info='Paste a sentence from a news article to determine if it is subjective or objective.'
118
  ),
119
  outputs=gr.Textbox(
120
- label="Results",
121
- info="Detailed analysis including subjectivity prediction, class probabilities, and sentiment scores."
122
  ),
123
- title='Subjectivity Detection',
124
- description='Detect if a sentence is subjective or objective using a pre-trained model.',
125
- examples=[
126
- ['Nino Frassica, la moglie fuori controllo: "Fottiti in c***! Muori".'],
127
- ['Nino Frassica, la moglie fuori controllo dice fottiti in c***! Muori.'],
128
- ],
129
- cache_examples=True,
130
  )
131
 
132
  demo.launch()
 
5
  from transformers import pipeline
6
  import torch.nn as nn
7
 
8
+ # Model cards and thresholds
9
+ BASE_MODEL = "microsoft/mdeberta-v3-base"
10
+ SENT_SUBJ_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic"
11
+ SUBJ_ONLY_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-multilingual-no-arabic"
12
  THRESHOLD = 0.65
13
 
14
+ # Custom model for subjectivity (+ optional sentiment features)
15
  class CustomModel(PreTrainedModel):
16
  config_class = DebertaV2Config
17
 
18
+ def __init__(self, config, sentiment_dim=0, num_labels=2, *args, **kwargs):
19
  super().__init__(config, *args, **kwargs)
20
  self.deberta = DebertaV2Model(config)
21
  self.pooler = ContextPooler(config)
22
  output_dim = self.pooler.output_dim
23
  self.dropout = nn.Dropout(0.1)
 
24
  self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)
25
 
26
+ def forward(self, input_ids, attention_mask=None, token_type_ids=None,
27
+ positive=None, neutral=None, negative=None):
28
  outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
29
+ pooled = self.pooler(outputs[0])
30
+ if positive is not None and neutral is not None and negative is not None:
31
+ sent_feats = torch.stack((positive, neutral, negative), dim=1)
32
+ combined = torch.cat((pooled, sent_feats), dim=1)
33
+ else:
34
+ combined = pooled
35
+ logits = self.classifier(self.dropout(combined))
36
+ return logits
37
+
38
+ # Load tokenizer and model helper
39
+ def load_models():
40
+ # Tokenizer shared
41
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
42
+ # Sentiment+Subjectivity model
43
+ cfg1 = DebertaV2Config.from_pretrained(
44
+ SENT_SUBJ_MODEL,
 
 
 
 
 
 
 
 
 
45
  num_labels=2,
46
  id2label={0: 'OBJ', 1: 'SUBJ'},
47
  label2id={'OBJ': 0, 'SUBJ': 1},
48
  output_attentions=False,
49
  output_hidden_states=False
50
  )
51
+ model1 = CustomModel(config=cfg1, sentiment_dim=3)
52
+ model1 = model1.from_pretrained(SENT_SUBJ_MODEL)
53
+ # Subjectivity-only model
54
+ cfg2 = DebertaV2Config.from_pretrained(
55
+ SUBJ_ONLY_MODEL,
56
+ num_labels=2,
57
+ id2label={0: 'OBJ', 1: 'SUBJ'},
58
+ label2id={'OBJ': 0, 'SUBJ': 1},
59
+ output_attentions=False,
60
+ output_hidden_states=False
61
+ )
62
+ model2 = CustomModel(config=cfg2, sentiment_dim=0)
63
+ model2 = model2.from_pretrained(SUBJ_ONLY_MODEL)
64
+ return tokenizer, model1, model2
65
+
66
+ # Sentiment pipeline
67
+ sentiment_pipe = pipeline(
68
+ "sentiment-analysis",
69
+ model="cardiffnlp/twitter-xlm-roberta-base-sentiment",
70
+ tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment",
71
+ top_k=None
72
+ )
73
 
74
+ def get_sentiment_scores(text: str):
75
+ results = sentiment_pipe(text)[0]
76
+ return {lbl: score for lbl, score in [(list(d.keys())[0], list(d.values())[0]) for d in results]}
77
 
78
+ # Prediction function
79
+ # Caches models on first call
80
+ tokenizer, model_sent_subj, model_subj_only = None, None, None
 
 
81
 
 
82
  def predict_subjectivity(text):
83
+ global tokenizer, model_sent_subj, model_subj_only
84
+ if tokenizer is None:
85
+ tokenizer, model_sent_subj, model_subj_only = load_models()
 
 
 
 
 
86
 
87
+ # Tokenize input
88
  inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')
 
 
 
89
 
90
+ # Sentiment + subjectivity model inference
91
+ sent_scores = get_sentiment_scores(text)
92
+ pos, neu, neg = sent_scores['positive'], sent_scores['neutral'], sent_scores['negative']
93
+ logits1 = model_sent_subj(
94
+ input_ids=inputs['input_ids'],
95
+ attention_mask=inputs.get('attention_mask'),
96
+ positive=torch.tensor([pos]),
97
+ neutral=torch.tensor([neu]),
98
+ negative=torch.tensor([neg])
99
+ )
100
+ probs1 = torch.softmax(logits1, dim=1)[0]
101
 
102
+ # Subjectivity-only model inference
103
+ logits2 = model_subj_only(
104
+ input_ids=inputs['input_ids'],
105
+ attention_mask=inputs.get('attention_mask')
106
+ )
107
+ probs2 = torch.softmax(logits2, dim=1)[0]
108
 
109
+ # Formatting
110
+ output = []
111
+ output.append("Sentiment Scores (sent-subj model):")
112
+ output.append(f"- Positive: {pos:.2%}")
113
+ output.append(f"- Neutral: {neu:.2%}")
114
+ output.append(f"- Negative: {neg:.2%}\n")
115
 
116
+ output.append(f"Subjectivity (with sentiment) - OBJ: {probs1[0]:.2%}, SUBJ: {probs1[1]:.2%}")
117
+ output.append(f"Subjectivity (text only) - OBJ: {probs2[0]:.2%}, SUBJ: {probs2[1]:.2%}")
 
 
118
 
119
+ return "\n".join(output)
120
 
121
+ # Build Gradio interface
122
  demo = gr.Interface(
123
+ fn=predict_subjectivity,
124
  inputs=gr.Textbox(
125
  label='Input sentence',
126
  placeholder='Enter a sentence from a news article',
127
+ info='Paste a sentence from a news article to determine subjectivity'
128
  ),
129
  outputs=gr.Textbox(
130
+ label='Results',
131
+ info='Sentiment & dual-model subjectivity probabilities'
132
  ),
133
+ title='Dual-Model Subjectivity Detection',
134
+ description='Outputs sentiment scores and class probabilities from two subjectivity models.'
 
 
 
 
 
135
  )
136
 
137
  demo.launch()