MatteoFasulo commited on
Commit
33a1f42
Β·
verified Β·
1 Parent(s): c2ae4ec

Update with new layout

Browse files
Files changed (1) hide show
  1. app.py +92 -126
app.py CHANGED
@@ -5,133 +5,99 @@ from transformers.models.deberta.modeling_deberta import ContextPooler
5
  from transformers import pipeline
6
  import torch.nn as nn
7
 
8
- # Model cards and thresholds
9
  BASE_MODEL = "microsoft/mdeberta-v3-base"
10
  SENT_SUBJ_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic"
11
  SUBJ_ONLY_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-multilingual-no-arabic"
12
- THRESHOLD = 0.65
13
 
14
- # Custom model for subjectivity (+ optional sentiment features)
15
- class CustomModel(PreTrainedModel):
16
- config_class = DebertaV2Config
17
-
18
- def __init__(self, config, sentiment_dim=0, num_labels=2, *args, **kwargs):
19
- super().__init__(config, *args, **kwargs)
20
- self.deberta = DebertaV2Model(config)
21
- self.pooler = ContextPooler(config)
22
- output_dim = self.pooler.output_dim
23
- self.dropout = nn.Dropout(0.1)
24
- self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)
25
-
26
- def forward(self, input_ids, attention_mask=None, token_type_ids=None,
27
- positive=None, neutral=None, negative=None):
28
- outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
29
- pooled = self.pooler(outputs[0])
30
- if positive is not None and neutral is not None and negative is not None:
31
- sent_feats = torch.stack((positive, neutral, negative), dim=1)
32
- combined = torch.cat((pooled, sent_feats), dim=1)
33
- else:
34
- combined = pooled
35
- logits = self.classifier(self.dropout(combined))
36
- return logits
37
-
38
- # Load tokenizer and model helper
39
- def load_models():
40
- # Tokenizer shared
41
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
42
- # Sentiment+Subjectivity model
43
- cfg1 = DebertaV2Config.from_pretrained(
44
- SENT_SUBJ_MODEL,
45
- num_labels=2,
46
- id2label={0: 'OBJ', 1: 'SUBJ'},
47
- label2id={'OBJ': 0, 'SUBJ': 1},
48
- output_attentions=False,
49
- output_hidden_states=False
50
- )
51
- model1 = CustomModel(config=cfg1, sentiment_dim=3)
52
- model1 = model1.from_pretrained(SENT_SUBJ_MODEL)
53
- # Subjectivity-only model
54
- cfg2 = DebertaV2Config.from_pretrained(
55
- SUBJ_ONLY_MODEL,
56
- num_labels=2,
57
- id2label={0: 'OBJ', 1: 'SUBJ'},
58
- label2id={'OBJ': 0, 'SUBJ': 1},
59
- output_attentions=False,
60
- output_hidden_states=False
61
- )
62
- model2 = CustomModel(config=cfg2, sentiment_dim=0)
63
- model2 = model2.from_pretrained(SUBJ_ONLY_MODEL)
64
- return tokenizer, model1, model2
65
-
66
- # Sentiment pipeline
67
- sentiment_pipe = pipeline(
68
- "sentiment-analysis",
69
- model="cardiffnlp/twitter-xlm-roberta-base-sentiment",
70
- tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment",
71
- top_k=None
72
- )
73
-
74
- def get_sentiment_scores(text: str):
75
- results = sentiment_pipe(text)[0]
76
- return {lbl: score for lbl, score in [(list(d.keys())[0], list(d.values())[0]) for d in results]}
77
-
78
- # Prediction function
79
- # Caches models on first call
80
- tokenizer, model_sent_subj, model_subj_only = None, None, None
81
-
82
- def predict_subjectivity(text):
83
- global tokenizer, model_sent_subj, model_subj_only
84
- if tokenizer is None:
85
- tokenizer, model_sent_subj, model_subj_only = load_models()
86
-
87
- # Tokenize input
88
- inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')
89
-
90
- # Sentiment + subjectivity model inference
91
- sent_scores = get_sentiment_scores(text)
92
- pos, neu, neg = sent_scores['positive'], sent_scores['neutral'], sent_scores['negative']
93
- logits1 = model_sent_subj(
94
- input_ids=inputs['input_ids'],
95
- attention_mask=inputs.get('attention_mask'),
96
- positive=torch.tensor([pos]),
97
- neutral=torch.tensor([neu]),
98
- negative=torch.tensor([neg])
99
- )
100
- probs1 = torch.softmax(logits1, dim=1)[0]
101
-
102
- # Subjectivity-only model inference
103
- logits2 = model_subj_only(
104
- input_ids=inputs['input_ids'],
105
- attention_mask=inputs.get('attention_mask')
106
- )
107
- probs2 = torch.softmax(logits2, dim=1)[0]
108
-
109
- # Formatting
110
- output = []
111
- output.append("Sentiment Scores (sent-subj model):")
112
- output.append(f"- Positive: {pos:.2%}")
113
- output.append(f"- Neutral: {neu:.2%}")
114
- output.append(f"- Negative: {neg:.2%}\n")
115
-
116
- output.append(f"Subjectivity (with sentiment) - OBJ: {probs1[0]:.2%}, SUBJ: {probs1[1]:.2%}")
117
- output.append(f"Subjectivity (text only) - OBJ: {probs2[0]:.2%}, SUBJ: {probs2[1]:.2%}")
118
-
119
- return "\n".join(output)
120
-
121
- # Build Gradio interface
122
- demo = gr.Interface(
123
- fn=predict_subjectivity,
124
- inputs=gr.Textbox(
125
- label='Input sentence',
126
- placeholder='Enter a sentence from a news article',
127
- info='Paste a sentence from a news article to determine subjectivity'
128
- ),
129
- outputs=gr.Textbox(
130
- label='Results',
131
- info='Sentiment & dual-model subjectivity probabilities'
132
- ),
133
- title='Dual-Model Subjectivity Detection',
134
- description='Outputs sentiment scores and class probabilities from two subjectivity models.'
135
- )
136
-
137
- demo.launch()
 
5
  from transformers import pipeline
6
  import torch.nn as nn
7
 
8
+ # -- Model definitions
9
  BASE_MODEL = "microsoft/mdeberta-v3-base"
10
  SENT_SUBJ_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-sentiment-multilingual-no-arabic"
11
  SUBJ_ONLY_MODEL = "MatteoFasulo/mdeberta-v3-base-subjectivity-multilingual-no-arabic"
 
12
 
13
+ # -- Custom model builder
14
+ from functools import partial
15
+
16
+ def build_custom_model(sentiment_dim=0):
17
+ class CustomModel(PreTrainedModel):
18
+ config_class = DebertaV2Config
19
+ def __init__(self, config, *args, **kwargs):
20
+ super().__init__(config, *args, **kwargs)
21
+ self.deberta = DebertaV2Model(config)
22
+ self.pooler = ContextPooler(config)
23
+ self.dropout = nn.Dropout(0.1)
24
+ hidden_dim = self.pooler.output_dim + sentiment_dim
25
+ self.classifier = nn.Linear(hidden_dim, config.num_labels)
26
+ def forward(self, input_ids, attention_mask=None, **sent_kwargs):
27
+ x = self.deberta(input_ids=input_ids, attention_mask=attention_mask)[0]
28
+ pooled = self.pooler(x)
29
+ if sentiment_dim:
30
+ sent_feats = torch.stack((sent_kwargs['positive'], sent_kwargs['neutral'], sent_kwargs['negative']), dim=1)
31
+ pooled = torch.cat((pooled, sent_feats), dim=1)
32
+ return self.classifier(self.dropout(pooled))
33
+ return CustomModel
34
+
35
+ # -- Load models and tokenizer
36
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
37
+
38
+ # sentiment+subjectivity
39
+ cfg1 = DebertaV2Config.from_pretrained(SENT_SUBJ_MODEL, num_labels=2, id2label={0:'OBJ',1:'SUBJ'}, label2id={'OBJ':0,'SUBJ':1})
40
+ Model1Cls = build_custom_model(sentiment_dim=3)
41
+ model1 = Model1Cls.from_pretrained(SENT_SUBJ_MODEL, config=cfg1, ignore_mismatched_sizes=True)
42
+
43
+ # subjectivity-only
44
+ cfg2 = DebertaV2Config.from_pretrained(SUBJ_ONLY_MODEL, num_labels=2, id2label={0:'OBJ',1:'SUBJ'}, label2id={'OBJ':0,'SUBJ':1})
45
+ Model2Cls = build_custom_model(sentiment_dim=0)
46
+ model2 = Model2Cls.from_pretrained(SUBJ_ONLY_MODEL, config=cfg2)
47
+
48
+ # sentiment pipeline
49
+ sentiment_pipe = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment", tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment", top_k=None)
50
+
51
+ def get_sentiment_scores(text):
52
+ out = sentiment_pipe(text)[0]
53
+ return {list(d.keys())[0]: list(d.values())[0] for d in out}
54
+
55
+ # -- Prediction logic
56
+ def analyze(text):
57
+ # Tokenize
58
+ inputs = tokenizer(text, truncation=True, padding=True, max_length=256, return_tensors='pt')
59
+ # Sentiment
60
+ scores = get_sentiment_scores(text)
61
+ pos, neu, neg = scores['positive'], scores['neutral'], scores['negative']
62
+ # Model1
63
+ logits1 = model1(input_ids=inputs.input_ids, attention_mask=inputs.attention_mask, positive=torch.tensor([pos]), neutral=torch.tensor([neu]), negative=torch.tensor([neg]))
64
+ p1 = torch.softmax(logits1, dim=1)[0]
65
+ # Model2
66
+ logits2 = model2(input_ids=inputs.input_ids, attention_mask=inputs.attention_mask)
67
+ p2 = torch.softmax(logits2, dim=1)[0]
68
+ # Build results
69
+ return {
70
+ 'Positive': f"{pos:.2%}", 'Neutral': f"{neu:.2%}", 'Negative': f"{neg:.2%}",
71
+ 'Sent-Subj OBJ': f"{p1[0]:.2%}", 'Sent-Subj SUBJ': f"{p1[1]:.2%}",
72
+ 'TextOnly OBJ': f"{p2[0]:.2%}", 'TextOnly SUBJ': f"{p2[1]:.2%}"
73
+ }
74
+
75
+ # -- Build Gradio Dashboard with Blocks
76
+ dark_theme = gr.themes.Dark()
77
+
78
+ with gr.Blocks(theme=dark_theme, css="""
79
+ #result_table td { padding: 8px; font-size: 1rem; }
80
+ #header { text-align: center; font-size: 2rem; font-weight: bold; margin-bottom: 10px; }
81
+ """) as demo:
82
+ gr.Markdown("<div id='header'>πŸš€ Advanced Subjectivity & Sentiment Dashboard πŸš€</div>")
83
+ with gr.Row():
84
+ txt = gr.Textbox(label="Enter text to analyze", placeholder="Paste news sentence here...", lines=2)
85
+ btn = gr.Button("Analyze πŸ”", variant="primary")
86
+ with gr.Tabs():
87
+ with gr.TabItem("Overview πŸ“Š"):
88
+ chart = gr.BarPlot(x="category", y="value", label="Results", elem_id="result_chart")
89
+ with gr.TabItem("Raw Scores πŸ“‹"):
90
+ table = gr.Dataframe(headers=["Metric", "Value"], datatype=["str","str"], interactive=False, elem_id="result_table")
91
+ with gr.TabItem("About ℹ️"):
92
+ gr.Markdown("This dashboard uses two DeBERTa-based models (with and without sentiment integration) to detect subjectivity, alongside sentiment scores from an XLM-RoBERTa model.")
93
+ gr.Markdown("**Threshold** for subjective classification is adjustable in code (default: 0.65). Feel free to fork and customize! πŸš€")
94
+ # Link inputs to outputs
95
+ btn.click(fn=analyze, inputs=txt, outputs=[chart, table])
96
+ # Add confetti effect on button click
97
+ btn.js_on_event("click", {
98
+ "type": "confetti",
99
+ "props": {"particleCount": 100, "spread": 60}
100
+ })
101
+
102
+ # -- Launch
103
+ demo.queue().launch(server_name="0.0.0.0", share=True)